# Energy Web Scraping

In [1]:
from bs4 import BeautifulSoup
import requests

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_countries_by_renewable_electricity_production'
    
page = requests.get(url)

soup = BeautifulSoup(page.text, 'html')

table = soup.find_all('table')[0]

In [3]:
titles = table.find_all('th')
titles = [title.text.strip() for title in titles]

print(titles)

['Country / dependency', '%ren.', 'Ren.gen.(GWh)', '%hydro', '%wind', '%bio.', '%solar', '%geo.']


# Pandas

In [4]:
import pandas as pd

In [5]:
df = pd.DataFrame(columns = titles)

df

Unnamed: 0,Country / dependency,%ren.,Ren.gen.(GWh),%hydro,%wind,%bio.,%solar,%geo.


In [6]:
column_data = table.find_all('tr')

In [7]:
for row in column_data[1:]:
    row_data = row.find_all('td')
    individual_row_data = [data.text.strip() for data in row_data]
    
    length = len(df)
    df.loc[length] = individual_row_data

In [8]:
df

Unnamed: 0,Country / dependency,%ren.,Ren.gen.(GWh),%hydro,%wind,%bio.,%solar,%geo.
0,Ethiopia,100%,15817,94.5%,5.2%,0.2%,0.1%,0%
1,Bhutan,100%,10823,100%,0%,0%,0%,0%
2,Albania,100%,8963,99.5%,0%,0.5%,0%,0%
3,South Georgia and the South Sandwich Islands,100%,0.8,98.7%,1.3%,0%,0%,0%
4,Nepal,100%,8107,98.7%,0%,1.2%,0.1%,0%
...,...,...,...,...,...,...,...,...
220,Libya,0.02%,8,0%,0%,0.02%,0%,0%
221,Turkmenistan,0.01%,3,0.01%,0%,0%,0%,0%
222,Comoros,0%,0,0%,0%,0%,0%,0%
223,Saint Pierre and Miquelon,0%,0,0%,0%,0%,0%,0%


# Manipulation

In [65]:
# Sort the DataFrame based on the '%solar' column from highest to lowest
solar_df = df.sort_values(by='%solar', ascending=False)

#print(solar_df)
#solar_df.to_csv(r'C:\Users\Will Stevens\Desktop\Project\Energy\Solar_max.csv', index = False)

In [68]:
# Remove commas and convert 'Ren.gen.(GWh)' column to numeric
df['Ren.gen.(GWh)'] = df['Ren.gen.(GWh)'].str.replace(',', '').astype(float)

# Sort DataFrame by 'Ren.gen.(GWh)' column in descending order
gen_df = df.sort_values(by='Ren.gen.(GWh)', ascending=False)

# Print the sorted DataFrame
print(gen_df)

          Country / dependency  %ren.  Ren.gen.(GWh) %hydro  %wind  %bio.  \
100                      World  28.1%     7983492.00  15.5%   6.5%   3.6%   
98                       China  28.6%     2444538.00  15.7%   7.7%   3.8%   
126              United States  20.3%      886892.00   6.3%   8.8%   3.5%   
37                      Brazil  77.4%      507667.00  55.3%  11.0%   2.6%   
49                      Canada  67.5%      433791.00  59.5%   5.4%   0.9%   
..                         ...    ...            ...    ...    ...    ...   
148                       Niue  13.9%           0.50     0%     0%  13.9%   
217           Saint Barthélemy   0.1%           0.07     0%     0%   0.1%   
222                    Comoros     0%           0.00     0%     0%     0%   
223  Saint Pierre and Miquelon     0%           0.00     0%     0%     0%   
224               Sint Maarten     0%           0.00     0%     0%     0%   

     %solar %geo.  
100     1.9  0.3%  
98      0.9    0%  
126     1.2  0.