In [None]:
import pandas as pd
import json
import ast
import glob
import os

In [3]:
folder_path = 'Pneuf_NLCD'

# List of CSV files
csv_files = glob.glob(os.path.join(folder_path,'NLCD_pixel_counts_HUC12_*.csv'))  # adjust path/pattern

# Load and combine
df_list = [pd.read_csv(f) for f in csv_files]
df = pd.concat(df_list, ignore_index=True)

In [4]:
df

Unnamed: 0,Year,huc12,histogram
0,2019,170402080608,"{52=30066.25490196079, 11=63409.64313725485, 9..."
1,2019,170402080102,"{71=3897.3372549019623, 90=1614.0, 21=591.8980..."
2,2019,170402080104,"{71=90.0, 90=1950.6313725490197, 21=357.235294..."
3,2019,170402080106,"{71=1705.5529411764703, 90=450.0, 21=832.59999..."
4,2019,170402080107,"{71=1835.862745098039, 90=327.09019607843135, ..."
...,...,...,...
1099,2016,170402080401,"{95=2622.211764705883, 11=1.0, 52=66161.552941..."
1100,2016,170402080403,"{95=3262.698039215686, 11=65.0, 52=61137.82745..."
1101,2016,170402080205,"{71=573.6156862745098, 90=2.4705882352941178, ..."
1102,2016,170402080402,"{71=245.18039215686275, 90=327.0, 42=16840.321..."


In [5]:
def parse_histogram(s):
    if pd.isna(s) or s.strip() == '':
        return {}
    s_fixed = s.replace('=', ':')
    return ast.literal_eval(s_fixed)

df['histogram_dict'] = df['histogram'].apply(parse_histogram)

In [6]:
df

Unnamed: 0,Year,huc12,histogram,histogram_dict
0,2019,170402080608,"{52=30066.25490196079, 11=63409.64313725485, 9...","{52: 30066.25490196079, 11: 63409.64313725485,..."
1,2019,170402080102,"{71=3897.3372549019623, 90=1614.0, 21=591.8980...","{71: 3897.3372549019623, 90: 1614.0, 21: 591.8..."
2,2019,170402080104,"{71=90.0, 90=1950.6313725490197, 21=357.235294...","{71: 90.0, 90: 1950.6313725490197, 21: 357.235..."
3,2019,170402080106,"{71=1705.5529411764703, 90=450.0, 21=832.59999...","{71: 1705.5529411764703, 90: 450.0, 21: 832.59..."
4,2019,170402080107,"{71=1835.862745098039, 90=327.09019607843135, ...","{71: 1835.862745098039, 90: 327.09019607843135..."
...,...,...,...,...
1099,2016,170402080401,"{95=2622.211764705883, 11=1.0, 52=66161.552941...","{95: 2622.211764705883, 11: 1.0, 52: 66161.552..."
1100,2016,170402080403,"{95=3262.698039215686, 11=65.0, 52=61137.82745...","{95: 3262.698039215686, 11: 65.0, 52: 61137.82..."
1101,2016,170402080205,"{71=573.6156862745098, 90=2.4705882352941178, ...","{71: 573.6156862745098, 90: 2.4705882352941178..."
1102,2016,170402080402,"{71=245.18039215686275, 90=327.0, 42=16840.321...","{71: 245.18039215686275, 90: 327.0, 42: 16840...."


In [7]:
hist_df = pd.json_normalize(df['histogram_dict'])
hist_df.columns = [col for col in hist_df.columns]

In [8]:
hist_df

Unnamed: 0,52,11,95,31,71,24,90,42,82,81,23,41,22,21
0,30066.254902,63409.643137,6035.403922,148.545098,4104.729412,103.650980,18255.000000,88.356863,88375.454902,2226.435294,2336.349020,2.000000,11590.552941,4651.537255
1,87304.925490,,2309.462745,,3897.337255,,1614.000000,6408.439216,481.949020,405.000000,,10289.376471,18.200000,591.898039
2,47468.341176,,349.321569,,90.000000,,1950.631373,24594.956863,162.537255,3589.815686,,4569.066667,59.611765,357.235294
3,54972.835294,125.000000,338.098039,,1705.552941,,450.000000,3713.800000,5505.043137,3353.878431,,6065.615686,151.623529,832.600000
4,67600.776471,,399.180392,,1835.862745,,327.090196,5291.937255,19074.121569,1418.988235,,5265.133333,330.729412,1354.894118
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099,66161.552941,1.000000,2622.211765,,2941.862745,26.000000,60.000000,4184.235294,39900.384314,5004.725490,713.937255,4.000000,2866.007843,2633.145098
1100,61137.827451,65.000000,3262.698039,,1575.176471,1.000000,198.000000,7197.105882,10645.160784,5084.274510,203.352941,1753.000000,1100.431373,1894.419608
1101,42726.078431,1.000000,,,573.615686,,2.470588,10436.772549,16551.141176,,1.000000,641.109804,653.949020,1175.192157
1102,45872.556863,,68.686275,,245.180392,,327.000000,16840.321569,15964.513725,734.749020,2.000000,1609.952941,817.278431,1522.847059


In [9]:
land_cover_dict = {
    11: "Open Water",
    12: "Perennial Ice/Snow",
    21: "Developed, Open Space",
    22: "Developed, Low Intensity",
    23: "Developed, Medium Intensity",
    24: "Developed, High Intensity",
    31: "Barren Land",
    41: "Deciduous Forest",
    42: "Evergreen Forest",
    43: "Mixed Forest",
    52: "Shrub/Scrub",
    71: "Grassland/Herbaceous",
    81: "Pasture/Hay",
    82: "Cultivated Crops",
    90: "Woody Wetlands",
    95: "Emergent Herbaceous Wetlands"
}

In [10]:
def safe_rename(col):
    code = col
    if code in land_cover_dict:
        return f'{land_cover_dict[code].replace(" ", "_")}'
    else:
        return f'{code}'

hist_df.rename(columns=lambda c: safe_rename(c), inplace=True)

In [11]:
hist_df

Unnamed: 0,Shrub/Scrub,Open_Water,Emergent_Herbaceous_Wetlands,Barren_Land,Grassland/Herbaceous,"Developed,_High_Intensity",Woody_Wetlands,Evergreen_Forest,Cultivated_Crops,Pasture/Hay,"Developed,_Medium_Intensity",Deciduous_Forest,"Developed,_Low_Intensity","Developed,_Open_Space"
0,30066.254902,63409.643137,6035.403922,148.545098,4104.729412,103.650980,18255.000000,88.356863,88375.454902,2226.435294,2336.349020,2.000000,11590.552941,4651.537255
1,87304.925490,,2309.462745,,3897.337255,,1614.000000,6408.439216,481.949020,405.000000,,10289.376471,18.200000,591.898039
2,47468.341176,,349.321569,,90.000000,,1950.631373,24594.956863,162.537255,3589.815686,,4569.066667,59.611765,357.235294
3,54972.835294,125.000000,338.098039,,1705.552941,,450.000000,3713.800000,5505.043137,3353.878431,,6065.615686,151.623529,832.600000
4,67600.776471,,399.180392,,1835.862745,,327.090196,5291.937255,19074.121569,1418.988235,,5265.133333,330.729412,1354.894118
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099,66161.552941,1.000000,2622.211765,,2941.862745,26.000000,60.000000,4184.235294,39900.384314,5004.725490,713.937255,4.000000,2866.007843,2633.145098
1100,61137.827451,65.000000,3262.698039,,1575.176471,1.000000,198.000000,7197.105882,10645.160784,5084.274510,203.352941,1753.000000,1100.431373,1894.419608
1101,42726.078431,1.000000,,,573.615686,,2.470588,10436.772549,16551.141176,,1.000000,641.109804,653.949020,1175.192157
1102,45872.556863,,68.686275,,245.180392,,327.000000,16840.321569,15964.513725,734.749020,2.000000,1609.952941,817.278431,1522.847059


In [13]:
# compute total pixels per row
total_pixels = hist_df.sum(axis=1).replace(0, 1)  # avoid division by zero

# compute percentage per crop
percent_df = hist_df.div(total_pixels, axis=0).multiply(100)

# merge counts + percentages
final_pix_df = pd.concat([df[['Year', 'huc12']], hist_df], axis=1)
final_pcent_df = pd.concat([df[['Year', 'huc12']], percent_df], axis=1)

In [14]:
total_pixels

0       231393.913725
1       113320.588235
2        83191.517647
3        77214.047059
4       102898.713725
            ...      
1099    127119.062745
1100     94117.447059
1101     72762.329412
1102     84005.086275
1103    117493.952941
Length: 1104, dtype: float64

In [15]:
final_pcent_df.sort_values('Year', inplace=True)
final_pix_df.sort_values('Year', inplace=True)

In [16]:
final_pcent_df.to_csv('NLCD_percent.csv', index=False)
final_pix_df.to_csv('NLCD_pixel_count.csv', index=False)