## Reshaping a Pandas Dataframe: Wide-to-Long for 3M data

The input data came from ArcGIS ZonalStats output then concatenating all the csv into single file in wide format</br>
This script is to reshape the table into long format so it fit with Stata-style csv requirements.

In [1]:
import pandas as pd

In [2]:
# Load the data
min_df = pd.read_csv("wld_cli_tas_monthly_min_1958_2021_terraclimate_05deg.csv")
mean_df = pd.read_csv("wld_cli_tas_monthly_mean_1958_2021_terraclimate_05deg.csv")
max_df = pd.read_csv("wld_cli_tas_monthly_max_1958_2021_terraclimate_05deg.csv")

In [3]:
# Melt the data frames to long format
min_long = pd.melt(min_df, id_vars=["lat", "lon", "orig_id"], var_name="date", value_name="min")
mean_long = pd.melt(mean_df, id_vars=["lat", "lon", "orig_id"], var_name="date", value_name="mean")
max_long = pd.melt(max_df, id_vars=["lat", "lon", "orig_id"], var_name="date", value_name="max")

In [4]:
# Merge the data frames
df = min_long.merge(mean_long, on=["lat", "lon", "orig_id", "date"])
df = df.merge(max_long, on=["lat", "lon", "orig_id", "date"])

In [5]:
# Extract year and month from the date column
df["year"] = df["date"].str[:4]
df["month"] = df["date"].str[4:6]

In [6]:
# Drop the date column
df = df.drop("date", axis=1)

In [7]:
# Rearrange the columns to the desired order
df = df[["lat", "lon", "orig_id", "year", "month", "min", "mean", "max"]]

In [8]:
# Save the resulting data frame to a Stata-style CSV file
df.to_csv("wld_cli_tas_monthly_1958_2021_terraclimate_05deg.csv", index=False)

In [9]:
# Check the result
check1 = pd.read_csv("wld_cli_tas_monthly_1958_2021_terraclimate_05deg.csv")
check1

Unnamed: 0,lat,lon,orig_id,year,month,min,mean,max
0,-55.75,-69.75,1,1958,1,8.130,8.713000,9.220
1,-55.75,-69.25,2,1958,1,7.785,8.316296,8.910
2,-55.75,-68.75,3,1958,1,7.830,8.510000,9.015
3,-55.75,-68.25,4,1958,1,7.795,8.607838,9.155
4,-55.75,-67.75,5,1958,1,7.635,8.504035,9.145
...,...,...,...,...,...,...,...,...
53403643,79.25,94.75,70057,2021,12,-25.050,-24.159052,-23.350
53403644,79.75,94.75,70058,2021,12,-25.050,-24.401056,-23.400
53403645,80.25,94.75,70059,2021,12,-26.000,-25.050000,-10.350
53403646,80.75,94.75,70060,2021,12,-26.000,-24.902431,-24.000


In [10]:
# Group the data by year and get the mean value for each year
df_year = df.groupby(["lat", "lon", "orig_id", "year"]).mean()

In [11]:
# Reset the index to obtain a flat data frame
df_year = df_year.reset_index()

In [12]:
# Save the resulting data frame to a CSV file
df_year.to_csv("wld_cli_tas_annual_1958_2021_terraclimate_05deg.csv", index=False)

In [13]:
# Check the result
check2 = pd.read_csv("wld_cli_tas_annual_1958_2021_terraclimate_05deg.csv")
check2

Unnamed: 0,lat,lon,orig_id,year,min,mean,max
0,-55.75,-69.75,1,1958,4.457917,5.056667,5.627917
1,-55.75,-69.75,1,1959,4.063333,4.661583,5.230833
2,-55.75,-69.75,1,1960,4.957083,5.553500,6.118333
3,-55.75,-69.75,1,1961,4.086667,4.681833,5.243750
4,-55.75,-69.75,1,1962,5.417500,6.015417,6.583333
...,...,...,...,...,...,...,...
4450299,83.25,-24.25,38336,2017,1.183333,1.265766,1.308333
4450300,83.25,-24.25,38336,2018,0.929167,0.988514,1.029167
4450301,83.25,-24.25,38336,2019,1.475000,1.611486,1.750000
4450302,83.25,-24.25,38336,2020,1.016667,1.208221,1.416667
