In [11]:
import pandas as pd
import numpy as np

# Load the datasets
consumption_df = pd.read_csv('/content/Energy Data - Consumption.csv')
emissions_df = pd.read_csv('/content/Energy Data - Emissions.csv')

# Rename columns to ensure consistency
emissions_df.rename(columns={'MSN': 'EIA_ID'}, inplace=True)
consumption_df.drop(columns=['Energy Category'], inplace=True)

# Remove rows with missing, 'Not Available', or 'Not Meaningful' values in the 'Value' column
consumption_df = consumption_df[~consumption_df['Value'].isna() &
                                (consumption_df['Value'] != 'Not Available') &
                                (consumption_df['Value'] != 'Not Meaningful')]
emissions_df = emissions_df[~emissions_df['Value'].isna() &
                            (emissions_df['Value'] != 'Not Available') &
                            (emissions_df['Value'] != 'Not Meaningful')]

# Handle 'N/A', empty, and NaN values in the 'Sector' column to denote 'Overall'
consumption_df['Sector'] = consumption_df['Sector'].replace({'N/A': 'Overall', '': 'Overall', np.nan: 'Overall'})
emissions_df['Sector'] = emissions_df['Sector'].replace({'N/A': 'Overall', '': 'Overall', np.nan: 'Overall'})

# Since the emissions DataFrame was adjusted to match the consumption DataFrame columns
# Ensure the column order in emissions_df matches that of consumption_df before merging
emissions_df = emissions_df[consumption_df.columns]

# Merge the datasets
final_df = pd.concat([consumption_df, emissions_df], ignore_index=True)

# Save the merged dataset
final_df.to_csv('/content/cleaned_merged_emissions_consumption.csv', index=False)

# Display the first few rows of the merged dataset
print(final_df.head())


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  consumption_df['Sector'] = consumption_df['Sector'].replace({'N/A': 'Overall', '': 'Overall', np.nan: 'Overall'})


    EIA_ID  YYYYMM      Date     Value  Column_Order  \
0  DFRCPUS  197301  1/1/1973  1706.943             1   
1  DFRCPUS  197302  2/1/1973  1614.436             1   
2  DFRCPUS  197303  3/1/1973  1140.521             1   
3  DFRCPUS  197304  4/1/1973   824.604             1   
4  DFRCPUS  197305  5/1/1973   676.745             1   

                                         Description  \
0  Distillate Fuel Oil Consumed by the Residentia...   
1  Distillate Fuel Oil Consumed by the Residentia...   
2  Distillate Fuel Oil Consumed by the Residentia...   
3  Distillate Fuel Oil Consumed by the Residentia...   
4  Distillate Fuel Oil Consumed by the Residentia...   

                       Unit          Energy Type Energy Class       Sector  
0  Thousand Barrels per Day  Distillate Fuel Oil    Petroleum  Residential  
1  Thousand Barrels per Day  Distillate Fuel Oil    Petroleum  Residential  
2  Thousand Barrels per Day  Distillate Fuel Oil    Petroleum  Residential  
3  Thousand Barrel