In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns

In [3]:
physio_annual_df = pd.read_csv(r"G:\fresh_start\paper\code_paper\main_data\01_new_data\regional_annual_extreme_data.csv")

In [4]:
physio_annual_df

Unnamed: 0,year,regions,lat,long,elevation,Tmin,Tmax,Tavg
0,1962,High Mountain,27.683330,87.783330,3119.0,-6.300000,21.000000,7.875205
1,1962,High Mountain,27.816670,86.716670,3450.0,-7.422254,18.146734,6.419072
2,1962,High Mountain,27.816670,86.716670,3700.0,-9.047254,16.521734,4.794072
3,1962,High Mountain,27.833330,86.766670,3857.0,-10.067754,15.501234,3.773572
4,1962,High Mountain,27.961111,86.808889,5200.0,-15.283419,11.761102,-1.605004
...,...,...,...,...,...,...,...,...
1398,2022,Hill,27.645134,85.620881,857.0,2.800000,34.325294,21.936899
1399,2022,Middle Mountain,27.505118,86.586215,2383.0,-1.000000,30.500000,16.285220
1400,2022,Middle Mountain,27.944561,85.595136,2574.0,-4.900000,23.500000,12.626625
1401,2022,Siwalik,26.730538,86.934812,101.0,6.700000,38.700000,25.051994


In [5]:


grouped_annual_df = physio_annual_df.groupby("regions")

# Modify to include p-values in the results table
annual_results_slope = pd.DataFrame(columns=["Region", "Tmax Slope", "Tmax p-value", "Tmin Slope", "Tmin p-value", "Tavg Slope", "Tavg p-value"])

# Function to calculate the slope and p-value using OLS
def calculate_slope_pvalue(x, y):
    x = sm.add_constant(x)  # Add an intercept term
    model = sm.OLS(y, x).fit()
    slope = model.params.iloc[1]  # Extract the slope using iloc
    p_value = model.pvalues.iloc[1]  # Extract the p-value using iloc
    return slope, p_value

# Loop through each region and calculate slopes and p-values for Tmax, Tmin, and Tavg
for region, group in grouped_annual_df:
    year = group["year"]
    
    # Calculate the slopes and p-values for Tmax, Tmin, and Tavg
    tmax_slope, tmax_p = calculate_slope_pvalue(year, group["Tmax"])
    tmin_slope, tmin_p = calculate_slope_pvalue(year, group["Tmin"])
    tavg_slope, tavg_p = calculate_slope_pvalue(year, group["Tavg"])
    
    # Create a temporary dataframe for concatenation
    temp_df = pd.DataFrame({
        "Region": [region],
        "Tmax Slope": [tmax_slope],
        "Tmax p-value": [tmax_p],
        "Tmin Slope": [tmin_slope],
        "Tmin p-value": [tmin_p],
        "Tavg Slope": [tavg_slope],
        "Tavg p-value": [tavg_p]
    })
    
    # Use pd.concat() to combine results
    annual_results_slope = pd.concat([annual_results_slope, temp_df], ignore_index=True)

# Display the result
print(annual_results_slope)

 

            Region  Tmax Slope  Tmax p-value  Tmin Slope  Tmin p-value  \
0    High Mountain   -0.018342      0.043604    0.000864  9.375820e-01   
1             Hill    0.016938      0.003054   -0.042609  1.262441e-11   
2  Middle Mountain    0.031038      0.000821   -0.046724  6.212480e-07   
3          Siwalik   -0.000486      0.958455   -0.157114  1.979760e-12   
4            Tarai   -0.004438      0.391362   -0.041680  3.240424e-05   

   Tavg Slope  Tavg p-value  
0   -0.002620  7.999019e-01  
1   -0.004516  3.779470e-01  
2    0.004931  3.027553e-01  
3   -0.046310  2.873383e-07  
4   -0.009335  1.500762e-02  


  annual_results_slope = pd.concat([annual_results_slope, temp_df], ignore_index=True)


In [6]:
#annual_results_slope.to_csv(r"G:\fresh_start\paper\code_paper\main_data\01_new_data\annual_extreme_ols_slope_results.csv", index=False)

In [7]:
physio_seasonal_df = pd.read_csv(r"G:\fresh_start\paper\code_paper\main_data\01_new_data\regional_seasonal_extreme_data.csv")
physio_seasonal_df

Unnamed: 0,year,regions,season,lat,long,elevation,Tmin,Tmax,Tavg
0,1962,High Mountain,Monsoon,27.683330,87.783330,3119.0,8.856557,16.422131,12.639344
1,1962,High Mountain,Monsoon,27.816670,86.716670,3450.0,6.807749,15.449855,11.128802
2,1962,High Mountain,Monsoon,27.816670,86.716670,3700.0,5.182749,13.824855,9.503802
3,1962,High Mountain,Monsoon,27.833330,86.766670,3857.0,4.162249,12.804355,8.483302
4,1962,High Mountain,Monsoon,27.961111,86.808889,5200.0,-3.721577,9.657263,2.967843
...,...,...,...,...,...,...,...,...,...
5607,2022,Siwalik,Winter,26.730538,86.934812,101.0,10.935556,25.413947,18.174751
5608,2022,Tarai,Monsoon,26.820440,87.159170,105.0,28.028706,36.322917,32.175812
5609,2022,Tarai,Postmonsoon,26.820440,87.159170,105.0,20.918742,33.357680,27.138211
5610,2022,Tarai,Premonsoon,26.820440,87.159170,105.0,23.699029,35.461666,29.580348


In [8]:


grouped_physio_seasonal_df = physio_seasonal_df.groupby(["regions", "season"])

seasonal_results_slope = pd.DataFrame(columns=["Region", "Season", "Tmax Slope", "Tmax p-value", "Tmin Slope", "Tmin p-value", "Tavg Slope", "Tavg p-value"])

for (region, season), group in grouped_physio_seasonal_df:
    year = group["year"]
    
    # Calculate the slopes and p-values for Tmax, Tmin, and Tavg
    tmax_slope, tmax_p = calculate_slope_pvalue(year, group["Tmax"])
    tmin_slope, tmin_p = calculate_slope_pvalue(year, group["Tmin"])
    tavg_slope, tavg_p = calculate_slope_pvalue(year, group["Tavg"])  

# Store the results in a temporary DataFrame
    temp_df = pd.DataFrame({
        "Region": [region],
        "Season": [season],
        "Tmax Slope": [tmax_slope],
        "Tmax p-value": [tmax_p],
        "Tmin Slope": [tmin_slope],
        "Tmin p-value": [tmin_p],
        "Tavg Slope": [tavg_slope],
        "Tavg p-value": [tavg_p]
    })
    
    # Append the temporary DataFrame to the results DataFrame
    seasonal_results_slope = pd.concat([seasonal_results_slope, temp_df], ignore_index=True)

# Display the results
print(seasonal_results_slope)

  seasonal_results_slope = pd.concat([seasonal_results_slope, temp_df], ignore_index=True)


             Region       Season  Tmax Slope  Tmax p-value  Tmin Slope  \
0     High Mountain      Monsoon   -0.011087  1.671660e-01   -0.015960   
1     High Mountain  Postmonsoon    0.011392  1.412351e-01   -0.001110   
2     High Mountain   Premonsoon    0.018568  1.146133e-02    0.003990   
3     High Mountain       Winter    0.003126  7.076895e-01   -0.017717   
4              Hill      Monsoon   -0.012215  3.381727e-02   -0.012832   
5              Hill  Postmonsoon    0.003181  5.873147e-01    0.001101   
6              Hill   Premonsoon    0.007075  2.332913e-01    0.007384   
7              Hill       Winter   -0.000425  9.422057e-01   -0.020728   
8   Middle Mountain      Monsoon   -0.003780  5.810846e-01   -0.023420   
9   Middle Mountain  Postmonsoon    0.023324  3.541616e-03    0.012823   
10  Middle Mountain   Premonsoon    0.012463  8.449251e-02    0.016356   
11  Middle Mountain       Winter    0.031711  5.830251e-03   -0.008812   
12          Siwalik      Monsoon   -0.

In [9]:
seasonal_results_slope.to_csv(r"G:\fresh_start\paper\code_paper\main_data\01_new_data\seasonal_extreme_ols_results_slope.csv", index=False)