In [47]:
import pandas as pd
import os

In [None]:
#################################################################
#     Filter the dates based on planting and harvesting date    #
#################################################################
loc_files = './soil'

wheat_df = pd.read_excel('sws_spring_wheat.xlsx',sheet_name='Sheet2')
for file_name in os.listdir(loc_files):
    if file_name.endswith('.xlsx'):
        # Load the datasets
        file_path = os.path.join(loc_files,file_name)
        loc_df = pd.read_excel(file_path)


        # Convert date columns to datetime format
        loc_df['date'] = pd.to_datetime(loc_df['date'])
        wheat_df['planting date'] = pd.to_datetime(wheat_df['planting date'])
        wheat_df['harvest date'] = pd.to_datetime(wheat_df['harvest date'])

        # Initialize an empty DataFrame to store the filtered results
        filtered_df = pd.DataFrame()
        location, extension = os.path.splitext(file_name)
        # Loop through each row of the growth data frame (sws_spring_wheat)
        for index, row in wheat_df.iterrows():
            if row['location'] == location:
                # Filter rows where the date is between the planting and harvest dates for the respective year
                mask = (loc_df['date'] >= row['planting date']) & (loc_df['date'] <= row['harvest date'])
                filtered_data = loc_df[mask]
                # Append the filtered data to the filtered_df
                filtered_df = pd.concat([filtered_df, filtered_data], ignore_index=True)
                
        output_file_path = os.path.join('./soil_p_h', f'{file_name}')
        filtered_df.to_excel(output_file_path, index=False)

In [None]:
### Adding drought indeces to the data, they are in 5-days resolution, in contrast with other data ###
file_path1 = './test'
file_path2 = './pdsi_p_h'
output_path = './p_h_with_strs'
excel_files = [f for f in os.listdir(file_path1) if f.endswith('.xlsx')]

for excel_f in excel_files:
    df1 = pd.read_excel(os.path.join(file_path1, excel_f))
    df2 = pd.read_excel(os.path.join(file_path2, excel_f))
    df1['date'] = pd.to_datetime(df1['date'])
    df2['date'] = pd.to_datetime(df2['date'])
    df1 = df1.merge(df2[['date', 'pdsi']], on = 'date', how = 'left')
    output_file = os.path.join(output_path, f'{os.path.splitext(excel_f)[0]}.xlsx')
    df1.to_excel(output_file, index = False)

In [None]:
#########################################
###    Calculation of wet day         ###
#########################################

def wet_day_cal(df):
    # Create a new column 'wet' where 1 indicates a wet day and 0 indicates a dry day
    df['wet'] = (df['precip'] > 0).astype(int)
    return df


directory_path = './p_h_with_strs'
output_directory = './p_h_with_strs'

for filename in os.listdir(directory_path):
    if filename.endswith('.xlsx'):
        try:
            file_path = os.path.join(directory_path, filename)
            data = pd.read_excel(file_path)

            # Apply calculations
            wet_day_cal(data)
            
            # Create a unique output file path
            output_file_path = os.path.join(output_directory, filename.replace('.csv', '.xlsx'))
            data.to_excel(output_file_path, index=False)
        except Exception as e:
            print(f"Error processing {filename}: {e}")

Unnamed: 0,date,doy,dap,stage,precip,tmax,tmin,Tavg,gdd,dgdd,...,srad,rmax,rmin,vs,ravg,vpd,cum_gdd,fdd,hdd,pdsi
0,2005-03-16,75,0,Emergence,1.3,17.05,4.95,11.000,11.000,0.000,...,125.3,58.6,20.8,10.4,39.70,0.89,11.000,0.00,0.00,-3.04
1,2005-03-17,76,1,Emergence,0.0,13.45,1.24,7.345,7.345,3.655,...,155.7,81.6,22.0,7.0,51.80,0.57,18.345,2.99,0.00,
2,2005-03-18,77,2,Emergence,0.0,14.05,0.55,7.300,7.300,0.045,...,159.1,74.1,33.4,5.5,53.75,0.59,25.645,3.68,0.00,
3,2005-03-19,78,3,Emergence,0.8,15.85,3.24,9.545,9.545,2.245,...,88.9,76.7,35.5,2.9,56.10,0.55,35.190,0.99,0.00,
4,2005-03-20,79,4,Emergence,2.1,18.45,6.35,12.400,12.400,2.855,...,170.2,100.0,31.4,7.0,65.70,0.83,47.590,0.00,0.00,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1821,2020-08-09,222,141,Beyond Maturity,0.0,28.65,13.65,21.150,21.150,0.300,...,298.9,57.3,20.6,3.0,38.95,1.85,2434.900,0.80,0.00,
1822,2020-08-10,223,142,Beyond Maturity,0.0,33.55,14.15,23.850,23.850,2.700,...,297.6,50.3,12.7,2.6,31.50,2.60,2458.750,0.30,3.55,
1823,2020-08-11,224,143,Beyond Maturity,0.0,32.85,17.25,25.050,25.050,1.200,...,300.5,39.0,12.1,5.4,25.55,2.74,2483.800,0.00,2.85,
1824,2020-08-12,225,144,Beyond Maturity,0.0,25.65,14.25,19.950,19.950,5.100,...,216.8,43.9,19.0,5.2,31.45,1.76,2503.750,0.20,0.00,-2.89


In [None]:
## Set dataframe. Taking average or sum of variables, considering which one makes sense, during their repsective growth stage ##

file_path = './test1/'

# gs here stands for growth stage ##
output_file_path = './gs_data/'
# List all .xlsx files in the directory
excel_files = [f for f in os.listdir(file_path) if f.endswith('.xlsx')]

# Loop through each Excel file
for excel_f in excel_files:
    # Read the Excel file into a DataFrame
    df = pd.read_excel(os.path.join(file_path, excel_f))
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df['date'].dt.year
    gs_agg = df.groupby(['stage', 'year']).agg({
            'precip': 'sum',
            'tmax': 'mean',
            'tmin': 'mean',
            'Tavg': 'mean',
            'gdd': 'sum',
            'dgdd': 'sum',
            'dtr': 'mean',
            'prdtr': 'mean',
            'pet': 'sum',
            'etr': 'mean',
            'srad': 'mean',
            'rmax': 'mean',
            'rmin': 'mean',
            'vs': 'mean',
            'ravg': 'mean',
            'vpd': 'mean',
            'fdd':'sum',
            'hdd':'sum',
            'spei':'mean',
            'pdsi':'mean',
            'soil':'mean'}).reset_index()
    stage_order = {"Emergence": 0, "Tillering": 1, "Jointing": 2, "Heading": 3, "Flowering": 4, "Grain fill": 5, "Maturity": 6, "Beyond Maturity": 7}
    gs_agg["stage_order"] = gs_agg["stage"].map(stage_order)
    gs_agg = gs_agg.sort_values(by=["stage_order","year"]).drop(columns="stage_order")
    gs_agg = gs_agg.reset_index(drop=True)

    # Calculate the number of days in each stage grouped by 'year' and 'stage'
    days_per_stage_year = df.groupby(['year', 'stage'])['dap'].agg(lambda x: x.max() - x.min()).reset_index()
    days_per_stage_year.rename(columns={'dap': '#days'}, inplace=True)

    # Calculate the total number of wet days in each stage grouped by 'year' and 'stage'
    wet_per_stage_year = df.groupby(['year', 'stage'])['wet'].agg(sum).reset_index()
    wet_per_stage_year.rename(columns={'wet': '#wet'}, inplace=True)

    # Merge the calculated values back into the aggregated dataset
    gs_agg = gs_agg.merge(days_per_stage_year, on=['year', 'stage'], how='left')
    gs_agg = gs_agg.merge(wet_per_stage_year, on=['year', 'stage'], how='left')

    # Calculate wet frequency
    gs_agg['wet_frequency'] = gs_agg['#wet'] / gs_agg['#days']

    # Save the output to an Excel file
    output_file = os.path.join(output_file_path, f'{os.path.splitext(excel_f)[0]}.xlsx')
    gs_agg.to_excel(output_file, index=False)
 

  wet_per_stage_year = df.groupby(['year', 'stage'])['wet'].agg(sum).reset_index()
  wet_per_stage_year = df.groupby(['year', 'stage'])['wet'].agg(sum).reset_index()
  wet_per_stage_year = df.groupby(['year', 'stage'])['wet'].agg(sum).reset_index()
  wet_per_stage_year = df.groupby(['year', 'stage'])['wet'].agg(sum).reset_index()
  wet_per_stage_year = df.groupby(['year', 'stage'])['wet'].agg(sum).reset_index()
  wet_per_stage_year = df.groupby(['year', 'stage'])['wet'].agg(sum).reset_index()
  wet_per_stage_year = df.groupby(['year', 'stage'])['wet'].agg(sum).reset_index()
  wet_per_stage_year = df.groupby(['year', 'stage'])['wet'].agg(sum).reset_index()
  wet_per_stage_year = df.groupby(['year', 'stage'])['wet'].agg(sum).reset_index()
  wet_per_stage_year = df.groupby(['year', 'stage'])['wet'].agg(sum).reset_index()
  wet_per_stage_year = df.groupby(['year', 'stage'])['wet'].agg(sum).reset_index()
  wet_per_stage_year = df.groupby(['year', 'stage'])['wet'].agg(sum).reset_index()
  we

In [76]:
gs_agg

Unnamed: 0,stage,year,precip,tmax,tmin,Tavg,gdd,dgdd,dtr,prdtr,...,rmax,rmin,vs,ravg,vpd,fdd,hdd,spei,pdsi,#days
0,Emergence,2005,40.4,13.578462,2.416154,7.997308,103.965,28.590,11.162308,0.272920,...,83.046154,35.592308,5.546154,59.319231,0.526154,28.55,0.00,-1.496667,-3.253333,12
1,Emergence,2006,0.0,20.897500,6.020000,13.458750,107.670,25.880,14.877500,0.000000,...,61.875000,31.350000,3.675000,46.612500,1.035000,5.45,5.77,-1.570000,-1.270000,7
2,Emergence,2007,31.3,14.910909,3.329091,9.120000,100.320,34.195,11.581818,0.319230,...,87.581818,42.463636,4.636364,65.022727,0.520000,16.88,0.00,-0.810000,-0.575000,10
3,Emergence,2008,9.4,15.178333,2.280000,8.729167,104.750,40.305,12.898333,0.057334,...,74.116667,32.958333,5.175000,53.537500,0.705833,30.32,4.45,-0.566667,-2.273333,11
4,Emergence,2009,0.0,22.858571,7.315714,15.087143,105.610,31.830,15.542857,0.000000,...,60.857143,31.085714,3.414286,45.971429,1.187143,1.16,12.10,-1.010000,1.420000,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,Beyond Maturity,2015,2.9,32.596970,18.667576,25.632273,843.220,59.665,13.929394,0.007846,...,38.166667,16.424242,3.612121,27.295455,2.786364,0.40,101.99,-1.504286,-3.165714,32
115,Beyond Maturity,2016,3.7,32.454872,14.759487,23.607179,920.680,66.040,17.695385,0.008314,...,61.961538,20.543590,2.930769,41.252564,2.297692,27.66,126.54,0.472500,-3.442500,38
116,Beyond Maturity,2018,0.0,35.440000,16.903684,26.171842,497.265,29.870,18.536316,0.000000,...,41.131579,12.715789,2.847368,26.923684,3.218421,6.53,104.67,-0.970000,-3.167500,18
117,Beyond Maturity,2019,9.9,32.439655,14.941034,23.690345,687.020,50.535,17.498621,0.028511,...,56.734483,18.896552,3.193103,37.815517,2.371724,22.14,82.76,0.125000,1.608333,28


In [None]:
## Combine all data frames ##
directory = './gs_data'
# List to store dataframes
dataframes = []

# Iterate over all files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".xlsx"):  # check for Excel files
        filepath = os.path.join(directory, filename)
        
        # Read the Excel file
        df = pd.read_excel(filepath)
        
        location, extension = os.path.splitext(filename)

        # Add a column with the filename (without extension)
        df['location'] = location
        
        # Append dataframe to the list
        dataframes.append(df)

# Concatenate all the dataframes
combined_df = pd.concat(dataframes, ignore_index=True)
combined_df = combined_df.drop(columns = ['#wet'])
combined_df.to_excel('./combined_gs.xlsx', index=False)


In [58]:
combined_df

Unnamed: 0,stage,year,precip,tmax,tmin,Tavg,gdd,dgdd,dtr,prdtr,...,ravg,vpd,fdd,hdd,spei,pdsi,soil,#days,wet_frequency,location
0,Emergence,2005,8.7,17.675455,2.147273,9.911364,109.025,17.860,15.528182,0.053153,...,55.418182,0.781818,28.90,2.32,0.135000,-4.035000,,10,0.500000,Almira
1,Emergence,2006,0.0,17.030000,2.763636,9.896818,108.865,41.200,14.266364,0.000000,...,52.918182,0.757273,23.78,0.66,0.885000,2.320000,,10,0.000000,Almira
2,Emergence,2007,2.0,17.837000,4.115000,10.976000,109.760,26.720,13.722000,0.015775,...,53.960000,0.775000,8.83,0.00,-0.070000,-1.045000,,9,0.222222,Almira
3,Emergence,2008,2.5,15.245000,1.016667,8.130833,97.570,50.445,14.228333,0.017236,...,53.150000,0.709167,46.29,0.00,0.175000,-2.810000,15.1,11,0.181818,Almira
4,Emergence,2009,3.3,16.260000,2.015000,9.137500,109.650,31.135,14.245000,0.026269,...,50.495833,0.815000,37.23,4.71,-0.666667,-2.716667,15.9,11,0.181818,Almira
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1809,Beyond Maturity,2015,2.9,32.596970,18.667576,25.632273,843.220,59.665,13.929394,0.007846,...,27.295455,2.786364,0.40,101.99,-1.504286,-3.165714,12.2,32,0.093750,Walla Walla
1810,Beyond Maturity,2016,3.7,32.454872,14.759487,23.607179,920.680,66.040,17.695385,0.008314,...,41.252564,2.297692,27.66,126.54,0.472500,-3.442500,12.3,38,0.052632,Walla Walla
1811,Beyond Maturity,2018,0.0,35.440000,16.903684,26.171842,497.265,29.870,18.536316,0.000000,...,26.923684,3.218421,6.53,104.67,-0.970000,-3.167500,,18,0.000000,Walla Walla
1812,Beyond Maturity,2019,9.9,32.439655,14.941034,23.690345,687.020,50.535,17.498621,0.028511,...,37.815517,2.371724,22.14,82.76,0.125000,1.608333,14.4,28,0.142857,Walla Walla


In [None]:
### Rehape the data so it will have columns based on growth stage and evironmental variable ###

# Define the order of the stages
stage_order = ["Emergence", "Tillering", "mature", "Jointing", "Heading", "Flowering", "Grain fill", "Maturity", "Beyond Maturity"]  

# Convert 'stage' to a categorical type with the specified order
combined_df["stage"] = pd.Categorical(combined_df["stage"], categories=stage_order, ordered=True)

# Pivot the data
reshaped_df = combined_df.pivot(index=["year", "location"], columns="stage")

# Flatten the multi-level column index
reshaped_df.columns = [f"{col[1]}_{col[0]}" for col in reshaped_df.columns]

# Reset the index for a clean format
reshaped_df.reset_index(inplace=True)

In [60]:
reshaped_df 

Unnamed: 0,year,location,Emergence_precip,Tillering_precip,Jointing_precip,Heading_precip,Flowering_precip,Grain fill_precip,Maturity_precip,Beyond Maturity_precip,...,Maturity_#days,Beyond Maturity_#days,Emergence_wet_frequency,Tillering_wet_frequency,Jointing_wet_frequency,Heading_wet_frequency,Flowering_wet_frequency,Grain fill_wet_frequency,Maturity_wet_frequency,Beyond Maturity_wet_frequency
0,2005,Almira,8.7,53.1,3.9,20.9,4.1,11.4,0.0,0.0,...,4.0,16.0,0.500000,0.391304,0.153846,0.200000,0.111111,0.176471,0.000000,0.000000
1,2005,Bickleton,19.6,42.2,7.3,3.3,0.0,1.9,0.0,,...,3.0,,0.500000,0.500000,0.277778,0.166667,0.000000,0.066667,0.000000,
2,2005,Connell,0.3,30.2,6.5,26.1,7.4,2.0,3.5,3.3,...,5.0,11.0,0.111111,0.400000,0.266667,0.416667,0.166667,0.187500,0.200000,0.181818
3,2005,Dayton,3.6,81.8,29.8,53.4,3.2,15.2,0.0,0.0,...,4.0,9.0,0.181818,0.555556,0.500000,0.500000,0.300000,0.315789,0.000000,0.000000
4,2005,Fairfield,5.9,79.6,21.3,22.1,18.1,0.3,0.0,10.1,...,4.0,14.0,0.500000,0.592593,0.437500,0.346154,0.400000,0.058824,0.000000,0.142857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224,2020,Plaza,6.0,83.0,26.4,36.5,0.0,0.0,1.2,0.7,...,6.0,15.0,0.076923,0.517241,0.375000,0.423077,0.000000,0.000000,0.166667,0.066667
225,2020,Pullman,4.2,73.1,25.6,27.9,0.4,0.0,2.0,1.4,...,6.0,10.0,0.181818,0.428571,0.312500,0.346154,0.100000,0.000000,0.166667,0.200000
226,2020,Reardan,7.8,72.3,34.5,28.4,0.0,0.0,1.8,0.0,...,6.0,23.0,0.272727,0.551724,0.466667,0.423077,0.000000,0.000000,0.166667,0.000000
227,2020,St. John,15.4,29.8,62.3,26.7,5.9,0.0,0.0,0.4,...,4.0,11.0,0.411765,0.172414,0.500000,0.333333,0.363636,0.000000,0.000000,0.090909


In [None]:
#################################################################################################
## I want to add precipitation (or any other variable) before each growing season to the data  ##
## I will sum up the precipitation from october until planting date to the data                ##
#################################################################################################

from datetime import datetime

loc_files = './pr'
output_path = './pr_season'
wheat_df = pd.read_excel('sws_spring_wheat.xlsx',sheet_name='Sheet2')
yearly_precip_sums = {}
for file_name in os.listdir(loc_files):
    if file_name.endswith('.xlsx'):
        # Load the datasets
        file_path = os.path.join(loc_files,file_name)
        loc_df = pd.read_excel(file_path)


        # Convert date columns to datetime format
        loc_df['date'] = pd.to_datetime(loc_df['date'])
        wheat_df['planting date'] = pd.to_datetime(wheat_df['planting date'])
        #wheat_df['harvest date'] = pd.to_datetime(wheat_df['harvest date'])

        
        # Initialize an empty DataFrame to store the filtered results
        filtered_df = pd.DataFrame()
        location, extension = os.path.splitext(file_name)
        # Loop through each row of the growth data frame (sws_spring_wheat)
        for index, row in wheat_df.iterrows():
            if row['location'] == location:
                year = row['planting date'].year - 1
                start_date = datetime.strptime(f'10/1/{year}', '%m/%d/%Y')
                
                # Apply the date filtering
                mask = (loc_df['date'] >= start_date) & (loc_df['date'] < row['planting date'])
                filtered_data = loc_df[mask]
               # Calculate the total precipitation for the year
                yearly_precip_sums[year +1] = filtered_data['precip'].sum()
        
        # Convert the dictionary to a DataFrame
        precip_df = pd.DataFrame(list(yearly_precip_sums.items()), columns=['year', 'preseason_precip'])
        
        # Save the DataFrame to the output directory
        output_file_path = os.path.join(output_path, f'{location}.xlsx')
        precip_df.to_excel(output_file_path, index=False)

In [None]:
directory = './pr_season'
# List to store dataframes
dataframes = []

# Iterate over all files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".xlsx") :  # check for Excel files
        filepath = os.path.join(directory, filename)
        
        # Read the Excel file
        df = pd.read_excel(filepath)
        
        location, extension = os.path.splitext(filename)

        # Add a column with the filename (without extension)
        df['location'] = location
        
        # Append dataframe to the list
        dataframes.append(df)

# Concatenate all the dataframes
combined_df = pd.concat(dataframes, ignore_index=True)
combined_df.to_excel('./combined_pr_season.xlsx', index=False)

reshaped_df = reshaped_df.merge(combined_df[['year', 'location', 'preseason_precip']], on=['year', 'location'], how='left')
reshaped_df

In [4]:
#####################################################################################################################
## I want to add soil moisture (or other variables) before each growing season to the data.                        ##
## I will sum up the soil moisture (or other variables) from october until planting date to the data               ##
#####################################################################################################################
from datetime import datetime
import pandas as pd 
import os

loc_files = 'pdsi'
output_path = './pdsi_season'
wheat_df = pd.read_excel('sws_spring_wheat.xlsx',sheet_name='Sheet2')
yearly_pdsi_sums = {}
for file_name in os.listdir(loc_files):
    if file_name.endswith('.xlsx'):
        # Load the datasets
        file_path = os.path.join(loc_files,file_name)
        loc_df = pd.read_excel(file_path)


        # Convert date columns to datetime format
        loc_df['date'] = pd.to_datetime(loc_df['date'])
        wheat_df['planting date'] = pd.to_datetime(wheat_df['planting date'])
        #wheat_df['harvest date'] = pd.to_datetime(wheat_df['harvest date'])

        
        # Initialize an empty DataFrame to store the filtered results
        filtered_df = pd.DataFrame()
        location, extension = os.path.splitext(file_name)
        # Loop through each row of the growth data frame (sws_spring_wheat)
        for index, row in wheat_df.iterrows():
            if row['location'] == location:
                year = row['planting date'].year - 1
                start_date = datetime.strptime(f'10/1/{year}', '%m/%d/%Y')
                
                # Apply the date filtering
                mask = (loc_df['date'] >= start_date) & (loc_df['date'] < row['planting date'])
                filtered_data = loc_df[mask]
               # Calculate the total pdsi for the year
                yearly_pdsi_sums[year +1] = filtered_data['pdsi'].mean()
        
        # Convert the dictionary to a DataFrame
        pdsi_df = pd.DataFrame(list(yearly_pdsi_sums.items()), columns=['year', 'preseason_pdsi'])
        
        # Save the DataFrame to the output directory
        output_file_path = os.path.join(output_path, f'{location}.xlsx')
        pdsi_df.to_excel(output_file_path, index=False)

In [6]:
location

'Walla Walla'

In [74]:
directory = './soil_season'
# List to store dataframes
dataframes = []

# Iterate over all files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".xlsx") :  # check for Excel files
        filepath = os.path.join(directory, filename)
        
        # Read the Excel file
        df = pd.read_excel(filepath)
        
        location, extension = os.path.splitext(filename)

        # Add a column with the filename (without extension)
        df['location'] = location
        
        # Append dataframe to the list
        dataframes.append(df)

# Concatenate all the dataframes
combined_df = pd.concat(dataframes, ignore_index=True)
combined_df.to_excel('./combined_soil_season.xlsx', index=False)
df_pre_soil = df_pre_precip.merge(combined_df[['year', 'location', 'preseason_soil']], on=['year', 'location'], how='left')

In [75]:
df_pre_soil

Unnamed: 0,year,location,Emergence_precip,Tillering_precip,Jointing_precip,Heading_precip,Flowering_precip,Grain fill_precip,Maturity_precip,Beyond Maturity_precip,...,Emergence_wet_frequency,Tillering_wet_frequency,Jointing_wet_frequency,Heading_wet_frequency,Flowering_wet_frequency,Grain fill_wet_frequency,Maturity_wet_frequency,Beyond Maturity_wet_frequency,preseason_precip,preseason_soil
0,2005,Almira,8.7,53.1,3.9,20.9,4.1,11.4,0.0,0.0,...,0.500000,0.391304,0.153846,0.200000,0.111111,0.176471,0.000000,0.000000,119.3,13.371429
1,2005,Bickleton,19.6,42.2,7.3,3.3,0.0,1.9,0.0,,...,0.500000,0.500000,0.277778,0.166667,0.000000,0.066667,0.000000,,173.8,18.257143
2,2005,Connell,0.3,30.2,6.5,26.1,7.4,2.0,3.5,3.3,...,0.111111,0.400000,0.266667,0.416667,0.166667,0.187500,0.200000,0.181818,71.1,6.750000
3,2005,Dayton,3.6,81.8,29.8,53.4,3.2,15.2,0.0,0.0,...,0.181818,0.555556,0.500000,0.500000,0.300000,0.315789,0.000000,0.000000,124.3,34.516667
4,2005,Fairfield,5.9,79.6,21.3,22.1,18.1,0.3,0.0,10.1,...,0.500000,0.592593,0.437500,0.346154,0.400000,0.058824,0.000000,0.142857,267.2,90.785714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224,2020,Plaza,6.0,83.0,26.4,36.5,0.0,0.0,1.2,0.7,...,0.076923,0.517241,0.375000,0.423077,0.000000,0.000000,0.166667,0.066667,290.4,90.971429
225,2020,Pullman,4.2,73.1,25.6,27.9,0.4,0.0,2.0,1.4,...,0.181818,0.428571,0.312500,0.346154,0.100000,0.000000,0.166667,0.200000,301.3,117.500000
226,2020,Reardan,7.8,72.3,34.5,28.4,0.0,0.0,1.8,0.0,...,0.272727,0.551724,0.466667,0.423077,0.000000,0.000000,0.166667,0.000000,259.3,78.371429
227,2020,St. John,15.4,29.8,62.3,26.7,5.9,0.0,0.0,0.4,...,0.411765,0.172414,0.500000,0.333333,0.363636,0.000000,0.000000,0.090909,212.2,70.800000


In [76]:
#df1= pd.read_csv('f_data_gs.csv')
directory = './temp_season'
# List to store dataframes
dataframes = []

# Iterate over all files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".xlsx") :  # check for Excel files
        filepath = os.path.join(directory, filename)
        
        # Read the Excel file
        df = pd.read_excel(filepath)
        
        location, extension = os.path.splitext(filename)

        # Add a column with the filename (without extension)
        df['location'] = location
        
        # Append dataframe to the list
        dataframes.append(df)

# Concatenate all the dataframes
combined_df = pd.concat(dataframes, ignore_index=True)
#combined_df.to_excel('./combined_soil_season.xlsx', index=False)
df_pre_temp = df_pre_soil.merge(combined_df[['year', 'location', 'preseason_temp']], on=['year', 'location'], how='left')
df_pre_temp.to_csv('f_data_gs1.csv')

In [77]:

#df1= pd.read_csv('data_f.csv')
directory = './pdsi_season'
# List to store dataframes
dataframes = []

# Iterate over all files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".xlsx") :  # check for Excel files
        filepath = os.path.join(directory, filename)
        
        # Read the Excel file
        df = pd.read_excel(filepath)
        
        location, extension = os.path.splitext(filename)

        # Add a column with the filename (without extension)
        df['location'] = location
        
        # Append dataframe to the list
        dataframes.append(df)

# Concatenate all the dataframes
combined_df = pd.concat(dataframes, ignore_index=True)
#combined_df.to_excel('./combined_soil_season.xlsx', index=False)
df_pre_temp = df_pre_temp.merge(combined_df[['year', 'location', 'preseason_pdsi']], on=['year', 'location'], how='left')
df_pre_temp.to_csv('f_data_gs2.csv')