# ETL project

This project, for the energy datasets by states collected from
    * [US Energy Information Administration](https://www.eia.gov/)
    * [Data World](https://data.world/)
does
    * Extract, Transform and Load
    
to PostgreSQL database.

In [1]:
import pandas as pd
import psycopg2
import numpy as np
import os

from dotenv import load_dotenv
   
from sqlalchemy import create_engine

# load up the variables from the .env file.
load_dotenv()

True

### States Abbreviation Table

In [2]:
# File to Load (Remember to Change These)
file_to_load = "Resources/StatetoAbbrev.csv"
states_data = pd.read_csv(file_to_load)
states_data.head()

Unnamed: 0,State,Abbrev,Code
0,Alabama,Ala.,AL
1,Alaska,Alaska,AK
2,Arizona,Ariz.,AZ
3,Arkansas,Ark.,AR
4,California,Calif.,CA


In [3]:
states_data['State_id']=states_data.index+1
states_data.head()

Unnamed: 0,State,Abbrev,Code,State_id
0,Alabama,Ala.,AL,1
1,Alaska,Alaska,AK,2
2,Arizona,Ariz.,AZ,3
3,Arkansas,Ark.,AR,4
4,California,Calif.,CA,5


### Natural Gas Production Table

In [74]:
file_to_load = "Resources/Natural Gas Marketed Production-StateRankings.csv"

natural_gas_production = pd.read_csv(file_to_load)
#natural_gas_production.head()

#### Clean the 'State' column

Some rows have full state name instead of state code.

In [75]:
# Set 'State' as index in State Abb table
state_df = states_data.set_index('State')

# Changing full name to state code
for i in natural_gas_production.index:
    s = natural_gas_production.loc[i,'State']
    if len(s)>2:
        natural_gas_production.loc[i,'State'] = state_df.loc[s,'Code']

#### Add State_id

Merge the production table and State_Abb table to add State_id, and then clean up the fields.

In [77]:
naturalgas_df = pd.merge(states_data, natural_gas_production, how='outer', left_on='Code', right_on='State')

naturalgas_df.drop(columns = ["State_x","State_y","Code","Abbrev","Note: Rankings are based on the full source data values. Excludes Gulf of Mexico federal offshore production."], inplace = True)

#naturalgas_df.head()

In [135]:
# Rename the columns
naturalgas_df.rename(columns = {"Natural Gas Marketed Production, million cu ft":"Total_Gas_million_cu_ft"}, inplace=True)

# Drop the rows with the non-available values
naturalgas_df.dropna(inplace=True)

for i in naturalgas_df.index:
    if naturalgas_df.loc[i, "Total_Gas_million_cu_ft"]=='--':
        naturalgas_df.drop([i], inplace=True)

# Display the table
naturalgas_df.head()

Unnamed: 0,State_id,Rank,Total_Gas_million_cu_ft
0,1,16,139485
1,2,12,341315
2,3,31,46
3,4,11,589973
4,5,14,202616


array(['139485', '341315', '46', '589973', '202616', '1831325', '788',
       '1861', '2418', '5054', '201505', '83973', '2810636', '24',
       '89525', '35564', '0', '43524', '433', '3', '1485142', '11798',
       '705789', '2409153', '2946117', '499', '6210673', '442', '3538',
       '7847102', '296810', '111476', '1799097', '1640264'], dtype=object)

### Energy Production Estimate Table

In [173]:
file_to_load = "Resources/P2.csv"
production_df = pd.read_csv(file_to_load)

production_df.rename(columns ={"Table P2.  Primary Energy Production Estimates in Trillion Btu, 2018 ":"State",
                               "Unnamed: 1":"Fossil Fuels", "Unnamed: 3":"Natural Gas", "Unnamed: 5":"Crude Oil",
                              "Unnamed: 9":"Biofuels", "Unnamed: 7":"Nuclear Electric Power","Unnamed: 11":"Wood and Waste",
                              "Unnamed: 13":"Other", "Unnamed: 15":"Total"}, inplace = True)

production_df.drop(columns = ['Unnamed: 2','Unnamed: 4', 'Unnamed: 6', 'Unnamed: 8','Unnamed: 10', 'Unnamed: 12', 'Unnamed: 14',
                             'Unnamed: 16'], inplace = True)

production_df.drop([0,1,2,3], inplace = True)

In [174]:
production_df['State'].values

array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'District of Columbia',
       'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana',
       'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland',
       'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
       'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire',
       'New Jersey', 'New Mexico', 'New York', 'North Carolina',
       'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
       'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee',
       'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
       'West Virginia', 'Wisconsin', 'Wyoming', nan,
       'Federal Offshore - Gulf of Mexico', 'Federal Offshore - Pacific',
       nan, 'United States', nan, nan, 'a  Includes refuse recovery.',
       'b  Marketed production.', 'c  Includes lease condensate.',
       'd  Biomass inputs (feedstock) to the production of b

In [175]:
# Merge the rows only the state names exist : left(state_data) merge
productionfinal_df = pd.merge(states_data,production_df,how='left', left_on='State', right_on='State')

productionfinal_df.drop(columns=["Abbrev", "State", "Code"], inplace = True)

#productionfinal_df.head()

In [176]:
# Rename the column names
productionfinal_df.rename(
    columns={
        'Fossil Fuels' : 'Fossil_Fuels',
        'Natural Gas' : 'Natural_Gas',
        'Crude Oil' : 'Crude_Oil',
        'Nuclear Electric Power' : 'Nuclear_Electricity',
        'Wood and Waste' : 'Wood_Waste'
    }, inplace=True)


productionfinal_df.dropna(inplace=True)
print(productionfinal_df.shape)
productionfinal_df.head()

(51, 9)


Unnamed: 0,State_id,Fossil_Fuels,Natural_Gas,Crude_Oil,Nuclear_Electricity,Biofuels,Wood_Waste,Other,Total
0,1,370.5,149.6,33.6,412.6,1.7,170.1,105.0,1243.0
1,2,13.8,375.3,997.4,0.0,(s),7.3,16.8,1410.6
2,3,140.8,(s),0.1,325.1,6.8,8.7,139.5,621.0
3,4,0.0,600.4,28.6,133.0,9.5,85.2,30.3,887.1
4,5,0.0,228.9,965.3,190.4,35.5,130.5,857.6,2408.2


In [172]:
for i in productionfinal_df.index:
    for cl in productionfinal_df.columns[1:]:
        #print(cl)
        if productionfinal_df.loc[i,cl].replace('.','').isnumeric()==False:
            productionfinal_df.drop([i], inplace=True)
            break
productionfinal_df.head()

Unnamed: 0,State_id,Fossil_Fuels,Natural_Gas,Crude_Oil,Nuclear_Electricity,Biofuels,Wood_Waste,Other,Total
3,4,0.0,600.4,28.6,133.0,9.5,85.2,30.3,887.1
6,7,0.0,0.0,0.0,176.5,2.3,24.3,11.7,214.7
7,8,0.0,0.0,0.0,0.0,0.0,1.4,2.0,3.4
8,9,0.0,0.0,0.0,0.0,0.0,0.9,0.7,1.6
9,10,0.0,1.1,10.5,306.5,1.8,181.9,65.7,567.5


### Renewable Potential energy

In [179]:
# File to Load (Remember to Change These)
file_to_load = "Resources/usretechnicalpotential.csv"

# Read Renewable energy potential file and store into Pandas data frame
renewable_energy_data = pd.read_csv(file_to_load)
renewable_energy_data.columns

Index(['Unnamed: 0', 'urbanUtilityScalePV_GWh', 'urbanUtilityScalePV_GW',
       'urbanUtilityScalePV_km2', 'ruralUtilityScalePV_GWh',
       'ruralUtilityScalePV_GW', 'ruralUtilityScalePV_km2', 'rooftopPV_GWh',
       'rooftopPV_GW', 'CSP_GWh', 'CSP_GW', 'CSP_km2', 'onshoreWind_GWh',
       'onshoreWind_GW', 'onshoreWind_km2', 'offshoreWind_GWh',
       'offshoreWind_GW', 'offshoreWind_km2', 'biopowerSolid_GWh',
       'biopowerSolid_GW', 'biopowerSolid_BDT', 'biopowerGaseous_GWh',
       'biopowerGaseous_GW', 'biopowerGaseous_Tonnes-CH4',
       'geothermalHydrothermal_GWh', 'geothermalHydrothermal_GW',
       'EGSGeothermal_GWh', 'EGSGeothermal_GW', 'hydropower_GWh',
       'hydropower_GW', 'hydropower_countOfSites'],
      dtype='object')

In [180]:
# Rename unnamed column as State
renewable_energy_data.rename(columns={'Unnamed: 0' : 'State'}, inplace=True)

# Select columns
renewable_potential = renewable_energy_data[[
                        'State',
                        'urbanUtilityScalePV_GWh', 'ruralUtilityScalePV_GWh',
                        'rooftopPV_GWh', 'CSP_GWh',
                        'onshoreWind_GWh', 'offshoreWind_GWh',                        
                        'biopowerSolid_GWh', 'biopowerGaseous_GWh',
                        'geothermalHydrothermal_GWh',
                        'EGSGeothermal_GWh',
                        'hydropower_GWh']]
#renewable_potential.head()

In [181]:
# To set State_id, merge this table with State Abb table
combined_df = pd.merge(states_data,renewable_potential,how='outer', on='State')
#combined_df.head()

In [182]:
renewable_df = combined_df.drop(columns=['State', 'Abbrev', 'Code'])
#renewable_df.set_index('State_id', inplace=True)

renewable_df.dropna(inplace=True)
renewable_df["Total"] = renewable_df[].iloc[]
renewable_df.head()

Unnamed: 0,State_id,urbanUtilityScalePV_GWh,ruralUtilityScalePV_GWh,rooftopPV_GWh,CSP_GWh,onshoreWind_GWh,offshoreWind_GWh,biopowerSolid_GWh,biopowerGaseous_GWh,geothermalHydrothermal_GWh,EGSGeothermal_GWh,hydropower_GWh
0,1,35850,3706838,15475.0,0,283,0.0,11193,1533,0,535489.0,4102
4,5,246008,8855917,106411.0,8490916,89862,2662579.0,12408,15510,130921,1344179.0,30023
6,7,7716,19627,6616.0,0,61,26545.0,494,414,0,56078.0,922
7,8,14856,272332,2185.0,0,21,60654.0,512,385,0,22813.0,30
9,10,72787,5137346,63986.0,358,0,34684.0,9664,3693,0,374161.0,682


In [258]:
renewable_df.iloc[:,[1:]].head()

SyntaxError: invalid syntax (<ipython-input-258-ee63f971ba85>, line 1)

### Electricity Production Table

In [203]:
# File to Load (Remember to Change These)
file_to_load = "Resources/Total Net Electricity Generation-StateRankings.csv"

# Read Renewable energy potential file and store into Pandas data frame
total_electricity_data = pd.read_csv(file_to_load)
#total_electricity_data.head()

In [204]:
total_electricity = total_electricity_data.drop(columns='Note: Rankings are based on the full source data values.')
total_electricity.rename(columns={'Total Net Electricity Generation, thousand MWh': 'Total_Net_GWh'}, inplace=True)
#total_electricity.head()

In [205]:
combined_df = pd.merge(states_data,total_electricity,how='outer', left_on='Code', right_on='State')
#combined_df.head()

In [207]:
total_electricity_df = combined_df[['State_id', 'Rank', 'Total_Net_GWh']].copy()
#total_electricity_df.set_index('State_id', inplace=True)
total_electricity_df.dropna(inplace=True)
total_electricity_df.head()

Unnamed: 0,State_id,Rank,Total_Net_GWh
0,1,7,10575
1,2,48,476
2,3,10,9350
3,4,31,3532
4,5,4,15465


### Coal and Crude Oil Produtcion Table

In [187]:
# File to Load (Remember to Change These)
file_to_load = "Resources/Coal Production-StateRankings.csv"
Coal_Production = pd.read_csv(file_to_load)
#Coal_Production.head()

In [188]:
Coal_Production.drop(columns=["Note: Rankings are based on the full source data values."],inplace=True)
#Coal_Production.head()

In [189]:
Coal_combined_df = pd.merge(states_data,Coal_Production,how='inner', left_on='Code', right_on='State')
#Coal_combined_df.head()

In [190]:
Coal_combined_df.drop(columns=["State_x", "State_y", "Abbrev", "Code"],inplace=True)

Coal_combined_df.rename(columns={"Coal Production, thousand short tons":"Total_Coal_thousand_short_tons"}, inplace=True)

Coal_combined_df.dropna(inplace=True)

Coal_combined_df.head()

Unnamed: 0,State_id,Rank,Total_Coal_thousand_short_tons
0,1,10,14783
1,2,20,902
2,3,16,6550
3,6,11,14026
4,14,4,49563


### Crude Oil Production Table

In [191]:
# File to Load (Remember to Change These)
file_to_load = "Resources/Crude Oil Production-StateRankings.csv"
CrudeOil_Production = pd.read_csv(file_to_load)
#CrudeOil_Production.head()

In [192]:
CrudeOil_Production.drop(columns=["Note: Rankings are based on the full source data values. Excludes federal offshore production."],inplace=True)
#CrudeOil_Production.head()

In [193]:
Crude_combined_df = pd.merge(states_data,CrudeOil_Production,how='inner', left_on='Code', right_on='State')
#Crude_combined_df.head()

In [194]:
Crude_combined_df.drop(columns=["State_x", "State_y", "Abbrev", "Code"],inplace=True)

Crude_combined_df.rename(
    columns={"Crude Oil Production, thousand barrels per day":"Total_Oil_thousand_barrels_per_day"}, 
    inplace=True)

Crude_combined_df.dropna(inplace=True)
Crude_combined_df.head()

Unnamed: 0,State_id,Rank,Total_Oil_thousand_barrels_per_day
0,1,20,6
1,2,5,404
2,3,31,0
3,4,18,10
4,5,6,392


### Total Energy Consumed Table

In [195]:
# Load up the CSV file for energy consumption
file_to_load = "Resources/Total Energy Consumed per Capita-StateRankings.csv"

# Read Renewable energy potential file and store into Pandas data frame
Totalenergyconsumed = pd.read_csv(file_to_load)

#Totalenergyconsumed.head()

In [196]:
# drop the last column by only pulling over the first 3
Totalenergyconsumed = Totalenergyconsumed[['Rank', 'State', 'Total Energy Consumed per Capita, million Btu']]

# Do an inner join to make sure we only have complete data and pull in the state_id.  This is done comparing the code field 
# the States data table and the State field from the main table.  This will allow all the tables to have the same access State_id 
# versus either complete state name or the state abbreviation.
Totalenergycon_clean = pd.merge(Totalenergyconsumed, states_data,how='inner',left_on='State', right_on='Code')

# Drop the extra columns from the merge and only keep the state_id.
Totalenergycon_clean = Totalenergycon_clean[['State_id','Rank','Total Energy Consumed per Capita, million Btu']]


In [197]:
# Rename the columns
Totalenergycon_clean.rename(
        columns={"Total Energy Consumed per Capita, million Btu":"Total_Consumed_million_Btu"},
        inplace=True)

Totalenergycon_clean.dropna(inplace=True)

#display the final table.
Totalenergycon_clean.head()

Unnamed: 0,State_id,Rank,Total_Consumed_million_Btu
0,51,1,967
1,19,2,945
2,35,3,872
3,2,4,830
4,16,5,513


### Total Energy Production Table

In [198]:
# Load up the CSV file for energy production
file_to_load = "Resources/Total Energy Production-StateRankings.csv"

# Read Renewable energy potential file and store into Pandas data frame
Totalenergyproduction = pd.read_csv(file_to_load)
#Totalenergyproduction.head()

In [199]:
# drop the last column by only pulling over the first 3
Totalenergyproduction = Totalenergyproduction[['Rank', 'State', 'Total Energy Production, trillion Btu']]

# Do an inner join to make sure we only have complete data and pull in the state_id.  This is done comparing the code field 
# the States data table and the State field from the main table.  This will allow all the tables to have the same access State_id 
# versus either complete state name or the state abbreviation.
Totalenergyprod_clean = pd.merge(Totalenergyproduction, states_data,how='inner',left_on='State', right_on='Code')

# Drop the extra columns from the merge and only keep the state_id.
Totalenergyprod_clean = Totalenergyprod_clean[['State_id','Rank','Total Energy Production, trillion Btu']]

In [200]:
# Rename the columns
Totalenergyprod_clean.rename(
        columns={"Total Energy Production, trillion Btu":"Total_Production_trillion_Btu"},
        inplace=True)

Totalenergyprod_clean.dropna(inplace=True)

# Display the final table.
Totalenergyprod_clean.head()

Unnamed: 0,State_id,Rank,Total_Production_trillion_Btu
0,44,1,20421
1,39,2,8987
2,51,3,7718
3,37,4,4881
4,49,5,4770


## Loading the data into SQL DB

In [113]:
# set the variables for the pull from SQL.  username and password are in an .env file.  You will need those to make this run
# on your machine.
host = 'localhost'
port = 5432
dbname='ETL_Project'
username = os.environ.get('USER_NAME')
pwd = os.environ.get('PASSWORD')

In [114]:
# set up the connection to the database "ETL_Project"
connection_string = f'{username}:{pwd}@{host}:{port}/{dbname}'
engine = create_engine(f'postgresql://{connection_string}')

In [115]:
engine.table_names()

['Total_Energy_Consumed',
 'Total_Energy_Production',
 'Energy_Production_Estimate',
 'Renewable_Energy_Potential',
 'Electricity_Production',
 'State_Abb',
 'NaturalGas_Production',
 'Coal_Production',
 'Crudeoil_Production']

EJ

In [223]:
# Load 'State Abb' table
states_data.to_sql(name='State_Abb', con=engine, if_exists='append', index=False)

In [218]:
# Load 'Renewable_Energy_Potential' table
renewable_df.to_sql(name='Renewable_Energy_Potential', con=engine, if_exists='append', index=False)

In [227]:
# Load 'Total Net Electricity' table
total_electricity_df.to_sql(name='Electricity_Production', con=engine, if_exists='append', index=False)

In [224]:
# Confirm data has been added by querying the State_Abb table
pd.read_sql_query('select "State_id", "State"  from "State_Abb"', con=engine).head()

Unnamed: 0,State_id,State
0,1,Alabama
1,2,Alaska
2,3,Arizona
3,4,Arkansas
4,5,California


In [225]:
# Confirm data has been added by querying the Renewable_Energy_Potential table
pd.read_sql_query('select * from "Renewable_Energy_Potential"', con=engine).head()

Unnamed: 0,State_id,urbanUtilityScalePV_GWh,ruralUtilityScalePV_GWh,rooftopPV_GWh,CSP_GWh,onshoreWind_GWh,offshoreWind_GWh,biopowerSolid_GWh,biopowerGaseous_GWh,geothermalHydrothermal_GWh,EGSGeothermal_GWh,hydropower_GWh
0,1,35850,3706838,15475,0,283,0,11193,1533,0,535489,4102
1,5,246008,8855917,106411,8490916,89862,2662579,12408,15510,130921,1344179,30023
2,7,7716,19627,6616,0,61,26545,494,414,0,56078,922
3,8,14856,272332,2185,0,21,60654,512,385,0,22813,30
4,10,72787,5137346,63986,358,0,34684,9664,3693,0,374161,682


In [228]:
# Confirm data has been added by querying the Electricity_Production table
pd.read_sql_query('select * from "Electricity_Production"', con=engine).head()

Unnamed: 0,State_id,Rank,Total_Net_GWh
0,1,7,10575
1,2,48,476
2,3,10,9350
3,4,31,3532
4,5,4,15465


Niral

In [232]:
# Load 'NaturalGas' table
naturalgas_df.to_sql(name='NaturalGas_Production', con=engine, if_exists='append', index=False)

In [230]:
# Load 'Energy_Production_Estimate' table
productionfinal_df.to_sql(name='Energy_Production_Estimate', con=engine, if_exists='append', index=False)

In [231]:
# Confirm data has been added by querying the Electricity_Production table
pd.read_sql_query('select * from "Energy_Production_Estimate"', con=engine).head()

Unnamed: 0,State_id,Fossil_Fuels,Natural_Gas,Crude_Oil,Nuclear_Electricity,Biofuels,Wood_Waste,Other,Total
0,1,370.5,149.6,33.6,412.6,1.7,170.1,105.0,1243.0
1,2,13.8,375.3,997.4,0.0,(s),7.3,16.8,1410.6
2,3,140.8,(s),0.1,325.1,6.8,8.7,139.5,621.0
3,4,0.0,600.4,28.6,133.0,9.5,85.2,30.3,887.1
4,5,0.0,228.9,965.3,190.4,35.5,130.5,857.6,2408.2


In [233]:
# Confirm data has been added by querying the Electricity_Production table
pd.read_sql_query('select * from "NaturalGas_Production"', con=engine).head()

Unnamed: 0,State_id,Rank,Total_Gas_million_cu_ft
0,1,16,139485
1,2,12,341315
2,3,31,46
3,4,11,589973
4,5,14,202616


Teshanee

In [220]:
# Load 'Coal_Production' table
Coal_combined_df.to_sql(name='Coal_Production', con=engine, if_exists='append', index=False)

In [236]:
# Load 'Crudeoil_Production' table
Crude_combined_df.to_sql(name='Crudeoil_Production', con=engine, if_exists='append', index=False)

In [234]:
# Confirm data has been added by querying the Coal_Production table
pd.read_sql_query('select *  from "Coal_Production"', con=engine).head()

Unnamed: 0,State_id,Rank,Total_Coal_thousand_short_tons
0,1,10,14783
1,2,20,902
2,3,16,6550
3,6,11,14026
4,14,4,49563


In [237]:
# Confirm data has been added by querying the Crudeoil_Production table
pd.read_sql_query('select *  from "Crudeoil_Production"', con=engine).head()

Unnamed: 0,State_id,Rank,Total_Oil_thousand_barrels_per_day
0,1,20,6
1,2,5,404
2,3,31,0
3,4,18,10
4,5,6,392


Bill

In [239]:
# Load 'Total_Energy_Production' table
Totalenergyprod_clean.to_sql(name='Total_Energy_Production', con=engine, if_exists='append', index=False)

In [242]:
# Load 'Total_Energy_Consumed' table
Totalenergycon_clean.to_sql(name='Total_Energy_Consumed', con=engine, if_exists='append', index=False)

In [244]:
# Confirm data has been added by querying the Total_Energy_Production table
pd.read_sql_query('select *  from "Total_Energy_Production"', con=engine).head()

Unnamed: 0,State_id,Rank,Total_Production_trillion_Btu
0,44,1,20421
1,39,2,8987
2,51,3,7718
3,37,4,4881
4,49,5,4770


In [243]:
# Confirm data has been added by querying the Total_Energy_Consumed table
pd.read_sql_query('select *  from "Total_Energy_Consumed"', con=engine).head()

Unnamed: 0,State_id,Rank,Total_Consumed_million_Btu
0,51,1,967
1,19,2,945
2,35,3,872
3,2,4,830
4,16,5,513


In [245]:
pd.read_sql_query('select * from "Total_Energy_Consumed" tec inner join "State_Abb" sa on tec."State_id"=sa."State_id"', con=engine).head()

Unnamed: 0,State_id,Rank,Total_Consumed_million_Btu,State_id.1,State,Abbrev,Code
0,51,1,967,51,Wyoming,Wyo.,WY
1,19,2,945,19,Louisiana,La.,LA
2,35,3,872,35,North Dakota,N.D.,ND
3,2,4,830,2,Alaska,Alaska,AK
4,16,5,513,16,Iowa,Iowa,IA


## Queries

In [252]:
sql_query = f"""
    SELECT st."Code", rep."biopowerSolid_GWh", rep."biopowerGaseous_GWh"
    FROM "Renewable_Energy_Potential"  rep
    INNER JOIN "State_Abb"  st
    ON rep."State_id" = st."State_id"
"""
pd.read_sql_query(sql_query, con=engine).head()

Unnamed: 0,Code,biopowerSolid_GWh,biopowerGaseous_GWh
0,AL,11193,1533
1,CA,12408,15510
2,CT,494,414
3,DE,512,385
4,FL,9664,3693


In [257]:
sql_query = f"""
    SELECT rep."biopowerSolid_GWh", rep."biopowerGaseous_GWh", epe."Biofuels"
    FROM "Renewable_Energy_Potential"  rep
    INNER JOIN "Energy_Production_Estimate" epe
    ON rep."State_id" = epe."State_id"
    WHERE rep."State_id" IN (
        SELECT "State_id"
        FROM "State_Abb"
        WHERE "Code"='NC'
    )
"""
pd.read_sql_query(sql_query, con=engine).head()

Unnamed: 0,biopowerSolid_GWh,biopowerGaseous_GWh,Biofuels
0,12869,3780,0.2


In [None]:
sql_query = f"""
    SELECT tec."Total_Consumed_million_Btu", tep."Total_Production_trillion_Btu"
    FROM "Total_Energy_Consumed"  tec
    INNER JOIN "Total_Energy_Production" tep
    INNER JOIN "Total_Energy_Production" tep
    ON tec."State_id" = tep."State_id"
    WHERE tec."Rank" > 10
"""