In [50]:
%matplotlib notebook

In [30]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
from scipy.stats import linregress
import numpy as np

# Study data files
gdp_path = "Resources/GDP_info _clean.csv"
solarpower_path = "Resources/solar_power_per_country.csv"

# Read the GDP data and the solar power results
gdp_data = pd.read_csv(gdp_path)
solarpower_data = pd.read_csv(solarpower_path)

#Rename Columns, including date columns
gdp_df = gdp_data.rename(columns={"Country Name": "Country", "2016": "2016_GDP", "2017": "2017_GDP", "2018": "2018_GDP", "2019": "2019_GDP","2020": "2020_GDP"})
solarpower_df = solarpower_data.rename(columns={"Country or territory": "Country", "2016_Total": "2016_Solar", "2017_Total": "2017_Solar", "2018_Total": "2018_Solar", "2019_Total": "2019_Solar","2020_Total": "2020_Solar"})

#Drop non-important columns
gdp_df = gdp_df.drop(columns=["Country Code", "Indicator Name", "Indicator Code"])
solarpower_df = solarpower_df.drop(columns = ["W per capita 2019", "2016_New", "2017_New", "2018_New", "2019_New", "2020_New"])

In [32]:
# Combine the data into a single dataset
data_df = pd.merge(gdp_df, solarpower_df, how="inner", on="Country")


#Remove NA values
data_df = data_df.dropna(axis = 0, how ="any", thresh = None, subset = None, inplace=False)

# Display the data table for preview
#data_df.value_counts()

In [33]:
data_df.head()

Unnamed: 0,Country,2016_GDP,2017_GDP,2018_GDP,2019_GDP,2020_GDP,2016_Solar,2017_Solar,2018_Solar,2019_Solar,2020_Solar,Share of total consumption %
2,Australia,47255.30701,48398.5497,50100.22467,52030.64391,52397.38705,5900.0,7200.0,11300,15928,17627,10.7
3,Austria,52684.01711,54172.98679,57059.53894,58641.29812,55648.87456,1077.0,1250.0,1431,1578,2220,3.4
4,Belgium,48597.39998,50442.27054,52623.55967,54918.1662,52626.58164,3422.0,3800.0,4026,4531,5646,6.6
6,Bulgaria,20074.2857,21469.93915,22957.40013,24707.0626,24619.95089,1028.0,1036.0,1036,1065,1073,4.7
10,Canada,46472.34025,48317.09658,50239.99105,50660.57779,48090.99217,2715.0,2900.0,3113,3310,3325,0.7


In [42]:
data_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 37 entries, 2 to 72
Data columns (total 12 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Country                       37 non-null     object 
 1   2016_GDP                      37 non-null     float64
 2   2017_GDP                      37 non-null     float64
 3   2018_GDP                      37 non-null     float64
 4   2019_GDP                      37 non-null     float64
 5   2020_GDP                      37 non-null     float64
 6   2016_Solar                    37 non-null     float64
 7   2017_Solar                    37 non-null     float64
 8   2018_Solar                    37 non-null     int64  
 9   2019_Solar                    37 non-null     int64  
 10  2020_Solar                    37 non-null     int64  
 11  Share of total consumption %  37 non-null     float64
dtypes: float64(8), int64(3), object(1)
memory usage: 3.8+ KB


In [48]:
df_pie = data_df.set_index("Country")
df_pie.head()

Unnamed: 0_level_0,2016_GDP,2017_GDP,2018_GDP,2019_GDP,2020_GDP,2016_Solar,2017_Solar,2018_Solar,2019_Solar,2020_Solar,Share of total consumption %
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Australia,47255.30701,48398.5497,50100.22467,52030.64391,52397.38705,5900.0,7200.0,11300,15928,17627,10.7
Austria,52684.01711,54172.98679,57059.53894,58641.29812,55648.87456,1077.0,1250.0,1431,1578,2220,3.4
Belgium,48597.39998,50442.27054,52623.55967,54918.1662,52626.58164,3422.0,3800.0,4026,4531,5646,6.6
Bulgaria,20074.2857,21469.93915,22957.40013,24707.0626,24619.95089,1028.0,1036.0,1036,1065,1073,4.7
Canada,46472.34025,48317.09658,50239.99105,50660.57779,48090.99217,2715.0,2900.0,3113,3310,3325,0.7


In [60]:
df_pie.plot(kind="pie",y="Share of total consumption %",legend=None)

<IPython.core.display.Javascript object>

<AxesSubplot:ylabel='Share of total consumption %'>