In [1]:
import pandas as pd
import json
import ast
import glob
import os
from itables import show

In [2]:
folder_path = 'Pneuf_IRR'

# List of CSV files
csv_files = glob.glob(os.path.join(folder_path,'IRR_pixel_counts_HUC12_*.csv'))  # adjust path/pattern

# Load and combine
df_list = [pd.read_csv(f) for f in csv_files]
df = pd.concat(df_list, ignore_index=True)

In [3]:
df

Unnamed: 0,Year,huc12,histogram
0,2000,170402080608,"{1=95554.7607843138, 0=220730.35294117668}"
1,2000,170402080102,"{1=3355.337254901961, 0=151585.14901960822}"
2,2000,170402080104,"{1=5103.78039215686, 0=108510.0862745099}"
3,2000,170402080106,"{1=6720.388235294119, 0=98739.91764705897}"
4,2000,170402080107,"{1=10033.682352941178, 0=130333.56078431397}"
...,...,...,...
1099,2022,170402080401,"{1=31320.239215686295, 0=141360.3529411766}"
1100,2022,170402080403,"{1=17214.88235294117, 0=110771.25882352926}"
1101,2022,170402080205,"{1=8666.803921568626, 0=90461.1725490196}"
1102,2022,170402080402,"{1=12675.980392156871, 0=101503.48627450978}"


In [4]:
def parse_histogram(s):
    if pd.isna(s) or s.strip() == '':
        return {}
    s_fixed = s.replace('=', ':')
    return ast.literal_eval(s_fixed)

df['histogram_dict'] = df['histogram'].apply(parse_histogram)

In [5]:
df

Unnamed: 0,Year,huc12,histogram,histogram_dict
0,2000,170402080608,"{1=95554.7607843138, 0=220730.35294117668}","{1: 95554.7607843138, 0: 220730.35294117668}"
1,2000,170402080102,"{1=3355.337254901961, 0=151585.14901960822}","{1: 3355.337254901961, 0: 151585.14901960822}"
2,2000,170402080104,"{1=5103.78039215686, 0=108510.0862745099}","{1: 5103.78039215686, 0: 108510.0862745099}"
3,2000,170402080106,"{1=6720.388235294119, 0=98739.91764705897}","{1: 6720.388235294119, 0: 98739.91764705897}"
4,2000,170402080107,"{1=10033.682352941178, 0=130333.56078431397}","{1: 10033.682352941178, 0: 130333.56078431397}"
...,...,...,...,...
1099,2022,170402080401,"{1=31320.239215686295, 0=141360.3529411766}","{1: 31320.239215686295, 0: 141360.3529411766}"
1100,2022,170402080403,"{1=17214.88235294117, 0=110771.25882352926}","{1: 17214.88235294117, 0: 110771.25882352926}"
1101,2022,170402080205,"{1=8666.803921568626, 0=90461.1725490196}","{1: 8666.803921568626, 0: 90461.1725490196}"
1102,2022,170402080402,"{1=12675.980392156871, 0=101503.48627450978}","{1: 12675.980392156871, 0: 101503.48627450978}"


In [6]:
hist_df = pd.json_normalize(df['histogram_dict'])
hist_df.columns = [col for col in ['Irrigated', 'Non-irrigated']]

In [7]:
final_df = pd.concat([df[['Year', 'huc12']], hist_df], axis=1).sort_values('Year', ignore_index=True)

In [8]:
final_df

Unnamed: 0,Year,huc12,Irrigated,Non-irrigated
0,2000,170402080608,95554.760784,220730.352941
1,2000,170402080305,1006.000000,102179.956863
2,2000,170402080306,13404.929412,100732.074510
3,2000,170402080201,8135.592157,79288.772549
4,2000,170402080203,602.658824,50602.847059
...,...,...,...,...
1099,2023,170402080203,5032.207843,46173.298039
1100,2023,170402080201,19417.823529,68006.541176
1101,2023,170402080306,17349.015686,96787.988235
1102,2023,170402080301,5.737255,75117.894118


In [9]:
final_df.to_csv('IrrMapper.csv', index=False)