In [1]:
import pandas as pd
import numpy as np
from scipy.stats import linregress


In [15]:
og_data = pd.read_csv(r"G:\fresh_start\paper\code_paper\main_data\raw_data\1962_lapse_station_data.csv")

In [16]:
og_data["Tavg"] = (og_data["Tmin"] + og_data["Tmax"])/2

In [None]:
og_data['date'] = pd.to_datetime(og_data["date"]) 
og_data['year'] = og_data["date"].dt.year
og_data['month'] = og_data['date'].dt.month


In [21]:
og_data = og_data.drop(columns=['date', 'station', 'lat', 'long', 'elevation'])

In [23]:
def adjust_year(row):
    if row['month'] == 12:
        return row['year'] + 1
    return row['year']

# Apply the function to create the adjusted year column
og_data['adjusted_year'] = og_data.apply(adjust_year, axis=1)
# Define a function to map months to seasons
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Premonsoon'
    elif month in [6, 7, 8, 9]:
        return 'Monsoon'
    elif month in [10, 11]:
        return 'Postmonsoon'
    else:
        return None


og_data['adjusted_year'] = og_data.apply(adjust_year, axis=1)
og_data['season'] = og_data['month'].apply(get_season)

# Replace 'year' with 'adjusted_year'
og_data = og_data.drop(columns=['year']).rename(columns={'adjusted_year': 'year'})

og_data


Unnamed: 0,regions,Tmin,Tmax,Tavg,month,year,season
0,Tarai,15.432384,27.691517,21.561951,1,1962,Winter
1,High Mountain,-6.310116,5.949017,-0.180549,1,1962,Winter
2,High Mountain,-4.158616,8.100517,1.970951,1,1962,Winter
3,High Mountain,-7.935116,4.324017,-1.805549,1,1962,Winter
4,High Mountain,-8.955616,3.303517,-2.826049,1,1962,Winter
...,...,...,...,...,...,...,...
512534,Hill,7.200000,19.000000,13.100000,12,2023,Winter
512535,Hill,4.000000,17.000000,10.500000,12,2023,Winter
512536,Hill,8.500000,20.000000,14.250000,12,2023,Winter
512537,Hill,6.500000,15.000000,10.750000,12,2023,Winter


In [24]:
# Group by year and season and calculate the mean for Tmin and Tmax
seasonal_df = og_data.groupby([  'year','regions', 'season']).agg({
    'Tmin': 'mean',
    'Tmax': 'mean',
    'Tavg': 'mean'
}).reset_index()
seasonal_df

Unnamed: 0,year,regions,season,Tmin,Tmax,Tavg
0,1962,High Mountain,Monsoon,4.257545,13.631692,8.944618
1,1962,High Mountain,Postmonsoon,-3.492888,10.053328,3.280220
2,1962,High Mountain,Premonsoon,-2.598872,10.744665,4.072897
3,1962,High Mountain,Winter,-7.278599,4.298333,-1.490133
4,1962,Hill,Monsoon,20.102428,28.483092,24.292760
...,...,...,...,...,...,...
1220,2023,High Mountain,Winter,-7.732854,5.998483,-0.867186
1221,2023,Hill,Winter,7.675115,20.276973,13.976044
1222,2023,Middle Mountain,Winter,2.048387,15.029032,8.538710
1223,2023,Siwalik,Winter,11.990323,26.940218,19.465270


In [25]:
s_df = seasonal_df.copy()

In [27]:
annual_values = s_df.groupby(['year', 'regions']).agg({'Tmin': 'mean', 'Tmax': 'mean', 'Tavg': 'mean'}).reset_index()
annual_values['season'] = 'annual'

# Append the new row to the original DataFrame
s_df = pd.concat([s_df, annual_values], ignore_index=True)
s_df

Unnamed: 0,year,regions,season,Tmin,Tmax,Tavg
0,1962,High Mountain,Monsoon,4.257545,13.631692,8.944618
1,1962,High Mountain,Postmonsoon,-3.492888,10.053328,3.280220
2,1962,High Mountain,Premonsoon,-2.598872,10.744665,4.072897
3,1962,High Mountain,Winter,-7.278599,4.298333,-1.490133
4,1962,Hill,Monsoon,20.102428,28.483092,24.292760
...,...,...,...,...,...,...
1530,2023,High Mountain,annual,-7.732854,5.998483,-0.867186
1531,2023,Hill,annual,7.675115,20.276973,13.976044
1532,2023,Middle Mountain,annual,2.048387,15.029032,8.538710
1533,2023,Siwalik,annual,11.990323,26.940218,19.465270


In [28]:
basin_df = s_df.copy()

In [29]:
basin_values = basin_df.groupby(['year', 'season']).agg({'Tmin': 'mean', 'Tmax': 'mean', 'Tavg': 'mean'}).reset_index()
basin_values['regions'] = 'Koshi Basin'

# Append the new row to the original DataFrame
basin_df = pd.concat([basin_df, basin_values], ignore_index=True)
basin_df

Unnamed: 0,year,regions,season,Tmin,Tmax,Tavg
0,1962,High Mountain,Monsoon,4.257545,13.631692,8.944618
1,1962,High Mountain,Postmonsoon,-3.492888,10.053328,3.280220
2,1962,High Mountain,Premonsoon,-2.598872,10.744665,4.072897
3,1962,High Mountain,Winter,-7.278599,4.298333,-1.490133
4,1962,Hill,Monsoon,20.102428,28.483092,24.292760
...,...,...,...,...,...,...
1837,2022,Koshi Basin,Premonsoon,13.933995,25.500496,19.717246
1838,2022,Koshi Basin,Winter,5.804054,17.985678,11.894866
1839,2022,Koshi Basin,annual,12.237830,23.401723,17.819777
1840,2023,Koshi Basin,Winter,6.027439,19.480130,12.753785


In [30]:
df_melted = pd.melt(basin_df, id_vars=['year', 'regions', 'season'], var_name='temperature_type', value_name='temperature')

In [32]:
df_naming = df_melted.copy()
df_naming["naming"] = df_naming['temperature_type'] + ' ' + df_naming['regions'] + ' ' + df_naming['season']
df_naming

Unnamed: 0,year,regions,season,temperature_type,temperature,naming
0,1962,High Mountain,Monsoon,Tmin,4.257545,Tmin High Mountain Monsoon
1,1962,High Mountain,Postmonsoon,Tmin,-3.492888,Tmin High Mountain Postmonsoon
2,1962,High Mountain,Premonsoon,Tmin,-2.598872,Tmin High Mountain Premonsoon
3,1962,High Mountain,Winter,Tmin,-7.278599,Tmin High Mountain Winter
4,1962,Hill,Monsoon,Tmin,20.102428,Tmin Hill Monsoon
...,...,...,...,...,...,...
5521,2022,Koshi Basin,Premonsoon,Tavg,19.717246,Tavg Koshi Basin Premonsoon
5522,2022,Koshi Basin,Winter,Tavg,11.894866,Tavg Koshi Basin Winter
5523,2022,Koshi Basin,annual,Tavg,17.819777,Tavg Koshi Basin annual
5524,2023,Koshi Basin,Winter,Tavg,12.753785,Tavg Koshi Basin Winter


In [33]:
# Select only the required columns
df_result = df_naming[['year', 'naming', 'temperature']]

# Show the resulting DataFrame
print(df_result)

      year                          naming  temperature
0     1962      Tmin High Mountain Monsoon     4.257545
1     1962  Tmin High Mountain Postmonsoon    -3.492888
2     1962   Tmin High Mountain Premonsoon    -2.598872
3     1962       Tmin High Mountain Winter    -7.278599
4     1962               Tmin Hill Monsoon    20.102428
...    ...                             ...          ...
5521  2022     Tavg Koshi Basin Premonsoon    19.717246
5522  2022         Tavg Koshi Basin Winter    11.894866
5523  2022         Tavg Koshi Basin annual    17.819777
5524  2023         Tavg Koshi Basin Winter    12.753785
5525  2023         Tavg Koshi Basin annual    12.753785

[5526 rows x 3 columns]


In [36]:
df_pivot = df_result.pivot(index='naming', columns='year', values='temperature')

df_pivot.to_csv("90_column_data.csv", index=True,)