# Net Generation for Biomass Monthly

In [123]:
import pandas as pd

df1 = pd.read_csv("Net_generation_for_biomass_monthly.csv", skiprows = 4)
df1 = df1.rename(columns={"New Jersey : all sectors thousand megawatthours": "biomass_monthly_netGen"})

print(f"NA before:\n{df1.isnull().sum()}")
print(f"\nTotal Rows: {len(df1)}\n")

df1

NA before:
Month                     0
biomass_monthly_netGen    0
dtype: int64

Total Rows: 294



Unnamed: 0,Month,biomass_monthly_netGen
0,Jun 2025,49.27026
1,May 2025,45.41358
2,Apr 2025,51.15349
3,Mar 2025,55.00624
4,Feb 2025,47.63726
...,...,...
289,May 2001,72.94038
290,Apr 2001,66.13678
291,Mar 2001,72.85431
292,Feb 2001,59.18693


# Net Generation for Small-Scale Solar Photovoltaic Monthly

In [124]:
df2 = pd.read_csv("Net_generation_for_small-scale_solar_photovoltaic_monthly.csv", skiprows = 4)
df2 = df2.rename(columns={"New Jersey : all sectors thousand megawatthours": "solar_photovoltaic_monthly_netGen"})

print(f"NA before:\n{df2.isnull().sum()}")
print(f"\nTotal Rows: {len(df2)}\n")

df2 = df2.dropna()

print(f"NA after:\n{df2.isnull().sum()}")
print(f"\nTotal Rows: {len(df2)}\n")

df2

NA before:
Month                                  0
solar_photovoltaic_monthly_netGen    157
dtype: int64

Total Rows: 294

NA after:
Month                                0
solar_photovoltaic_monthly_netGen    0
dtype: int64

Total Rows: 137



Unnamed: 0,Month,solar_photovoltaic_monthly_netGen
0,Jun 2025,423.22241
1,May 2025,374.65729
2,Apr 2025,354.21744
3,Mar 2025,381.72269
4,Feb 2025,243.47374
...,...,...
133,May 2014,140.11466
134,Apr 2014,127.29952
135,Mar 2014,119.95817
136,Feb 2014,92.53721


# Net Generation for Utility-Scale Photovoltaic Monthly

In [125]:
df3 = pd.read_csv("Net_generation_for_utility-scale_photovoltaic_monthly.csv", skiprows = 4)
df3 = df3.rename(columns={"New Jersey : all sectors thousand megawatthours": "utility-scale_photovoltaic_monthly_netGen"})

print(f"NA before:\n{df3.isnull().sum()}")
print(f"\nTotal Rows: {len(df3)}\n")

df3 = df3.dropna()

print(f"NA after:\n{df3.isnull().sum()}")
print(f"\nTotal Rows: {len(df3)}\n")

df3

NA before:
Month                                         0
utility-scale_photovoltaic_monthly_netGen    84
dtype: int64

Total Rows: 294

NA after:
Month                                        0
utility-scale_photovoltaic_monthly_netGen    0
dtype: int64

Total Rows: 210



Unnamed: 0,Month,utility-scale_photovoltaic_monthly_netGen
0,Jun 2025,200.78453
1,May 2025,190.59118
2,Apr 2025,180.23046
3,Mar 2025,159.07881
4,Feb 2025,105.29469
...,...,...
205,May 2008,0.30378
206,Apr 2008,0.29102
207,Mar 2008,0.22964
208,Feb 2008,0.11043


# Net Generation for Wind Monthly

In [126]:
df4 = pd.read_csv("Net_generation_for_wind_monthly.csv", skiprows = 4)
df4 = df4.rename(columns={"all sectors thousand megawatthours": "wind_monthly_netGen"})

print(f"NA before:\n{df4.isnull().sum()}")
print(f"\nTotal Rows: {len(df4)}\n")

df4 = df4.dropna()

print(f"NA after:\n{df4.isnull().sum()}")
print(f"\nTotal Rows: {len(df4)}\n")

df4

NA before:
Month                   0
wind_monthly_netGen    60
dtype: int64

Total Rows: 294

NA after:
Month                  0
wind_monthly_netGen    0
dtype: int64

Total Rows: 234



Unnamed: 0,Month,wind_monthly_netGen
0,Jun 2025,1.06500
1,May 2025,1.52900
2,Apr 2025,2.14900
3,Mar 2025,2.46300
4,Feb 2025,1.92700
...,...,...
229,May 2006,1.73401
230,Apr 2006,1.84243
231,Mar 2006,1.83325
232,Feb 2006,0.00000


# Number of Customer Accounts Monthly

In [127]:
df5 = pd.read_csv("Number_of_customer_accounts_monthly.csv", skiprows = 4)
df5 = df5.rename(columns={"New Jersey : all sectors number of customers": "customerAcc_number_monthly"})

print(f"NA before:\n{df5.isnull().sum()}")
print(f"\nTotal Rows: {len(df5)}\n")

df5

NA before:
Month                         0
customerAcc_number_monthly    0
dtype: int64

Total Rows: 210



Unnamed: 0,Month,customerAcc_number_monthly
0,Jun 2025,4316808
1,May 2025,4306612
2,Apr 2025,4305243
3,Mar 2025,4302023
4,Feb 2025,4301422
...,...,...
205,May 2008,3876322
206,Apr 2008,3886838
207,Mar 2008,3884744
208,Feb 2008,3920859


# Combining Datasets

Each datasets have the same arrangement of monhts, which means we can join it together

In [128]:
# Merging

merged_df = pd.merge(df1, df2, on='Month', how='outer')
merged_df = pd.merge(merged_df, df3, on='Month', how='outer')
merged_df = pd.merge(merged_df, df4, on='Month', how='outer')
merged_df = pd.merge(merged_df, df5, on='Month', how='outer')

# Remove NAs

print(f"NA before:\n{merged_df.isnull().sum()}")
print(f"\nTotal Rows: {len(merged_df)}\n")

merged_df = merged_df.dropna()

print(f"NA after:\n{merged_df.isnull().sum()}")
print(f"\nTotal Rows: {len(merged_df)}\n")

# Download merged dataframe
merged_df.to_csv("merged_df.csv", index=False)

merged_df


NA before:
Month                                          0
biomass_monthly_netGen                         0
solar_photovoltaic_monthly_netGen            157
utility-scale_photovoltaic_monthly_netGen     84
wind_monthly_netGen                           60
customerAcc_number_monthly                    84
dtype: int64

Total Rows: 294

NA after:
Month                                        0
biomass_monthly_netGen                       0
solar_photovoltaic_monthly_netGen            0
utility-scale_photovoltaic_monthly_netGen    0
wind_monthly_netGen                          0
customerAcc_number_monthly                   0
dtype: int64

Total Rows: 137



Unnamed: 0,Month,biomass_monthly_netGen,solar_photovoltaic_monthly_netGen,utility-scale_photovoltaic_monthly_netGen,wind_monthly_netGen,customerAcc_number_monthly
13,Apr 2014,81.47413,127.29952,52.91077,2.55123,3989523.0
14,Apr 2015,69.12619,133.91601,62.51904,2.18189,4004149.0
15,Apr 2016,81.09890,129.25127,81.52152,1.49122,4025298.0
16,Apr 2017,68.29556,155.32410,84.81901,2.27867,4040563.0
17,Apr 2018,82.53115,180.80267,88.86013,2.10318,4119380.0
...,...,...,...,...,...,...
289,Sep 2020,59.81590,218.52883,121.31106,1.25605,4160473.0
290,Sep 2021,59.31268,227.03561,122.18744,1.39200,4207615.0
291,Sep 2022,54.97271,287.46083,141.06542,1.40100,4228304.0
292,Sep 2023,51.20912,268.34143,130.17817,1.58900,4256236.0
