In [1]:
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
#Import files
energy_csv = "resources/energy_dataset.csv"
weather_csv = "resources/weather_features.csv"
energy_df = pd.read_csv(energy_csv)
weather_df = pd.read_csv(weather_csv)

# Merge DataFrames into one (big_df)
big_df = pd.merge(energy_df, weather_df, on="time")

## Main DataFrame

In [12]:
# Select only necessary columns for a better DataFrame (better_big_df)
better_big_df = pd.DataFrame({
    "Time":big_df["time"],
    "Month":big_df["month"],
    "Day":big_df["day"],
    "Year":big_df["year"],
    "Oil":big_df["generation fossil oil"],
    "Hydro River/Resivoir":big_df["generation hydro run-of-river and poundage"] + big_df["generation hydro water reservoir"],
    "Solar":big_df["generation solar"],
    "Nuclear":big_df["generation nuclear"],
    "City":big_df["city_name"],
    "Temp (F)":round(big_df["temp"]*(9/5)-459.67,2), # converting kelvin to fahrenheit
    "Rain (mm)":big_df["rain_3h"],
    "Snow (mm)":big_df["snow_3h"],
    "Clouds":big_df["clouds_all"],
    "Weather Description":big_df["weather_main"]})
# Preivew summary
better_big_df.count()

Time                    178396
Month                   178396
Day                     178396
Year                    178396
Oil                     178301
Hydro River/Resivoir    178301
Solar                   178306
Nuclear                 178311
City                    178396
Temp (F)                178396
Rain (mm)               178396
Snow (mm)               178396
Clouds                  178396
Weather Description     178396
dtype: int64

### Adding Seasons

In [5]:
# # Bin months into Seasons
# bins = [0,3,6,9,11]
# season = ["winter","sping","summer","fall"]

# # Add Seasons to DataFrame
# better_big_df["Season"] = pd.cut(better_big_df["Month"],bin_dec,labels=season_dec)

# Empty list to add seasons to
seasons = []

# Loop through Month data to assign season name
for month in better_big_df["Month"]:
    if month <= 2 or month == 12:
        seasons.append("Winter")
    if month >= 3 and month <= 5:
        seasons.append("Spring")
    if month >= 6 and month <= 8:
        seasons.append("Summer")
    if month >= 9 and month <= 11:
        seasons.append("Fall")

# Add Seasons to DataFrame
better_big_df["Season"] = seasons

# Preview
better_big_df

Unnamed: 0,Time,Month,Day,Year,Oil,Hydro River/Resivoir,Solar,Nuclear,City,Temp (F),Rain (mm),Snow (mm),Clouds,Weather Description,Season
0,2015-01-01 00:00:00+01:00,1,1,2015,162.0,2950.0,49.0,7096.0,Valencia,27.19,0.0,0.0,0,clear,Winter
1,2015-01-01 00:00:00+01:00,1,1,2015,162.0,2950.0,49.0,7096.0,Madrid,21.51,0.0,0.0,0,clear,Winter
2,2015-01-01 00:00:00+01:00,1,1,2015,162.0,2950.0,49.0,7096.0,Bilbao,25.71,0.0,0.0,0,clear,Winter
3,2015-01-01 00:00:00+01:00,1,1,2015,162.0,2950.0,49.0,7096.0,Barcelona,47.26,0.0,0.0,0,clear,Winter
4,2015-01-01 00:00:00+01:00,1,1,2015,162.0,2950.0,49.0,7096.0,Seville,32.40,0.0,0.0,0,clear,Winter
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178391,2018-12-31 23:00:00+01:00,12,31,2018,163.0,2755.0,31.0,6075.0,Valencia,42.78,0.0,0.0,0,clear,Winter
178392,2018-12-31 23:00:00+01:00,12,31,2018,163.0,2755.0,31.0,6075.0,Madrid,35.60,0.0,0.0,0,clear,Winter
178393,2018-12-31 23:00:00+01:00,12,31,2018,163.0,2755.0,31.0,6075.0,Bilbao,36.41,0.0,0.0,0,clear,Winter
178394,2018-12-31 23:00:00+01:00,12,31,2018,163.0,2755.0,31.0,6075.0,Barcelona,44.56,0.0,0.0,0,clear,Winter


In [6]:
# Energy generation by year by energy type (divided by 5 to remove duplicate city data)
yearly_view = better_big_df.groupby("Year")["Oil","Hydro River/Resivoir","Solar","Nuclear"].sum()/5

# Energy generation total each year
yearly_view["Year Total"] = yearly_view.sum(axis=1)

yearly_view

Unnamed: 0_level_0,Oil,Hydro River/Resivoir,Solar,Nuclear,Year Total
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015,2906359.0,29953756.4,12795437.8,54975336.6,100630889.8
2016,2550127.8,39257440.0,12471841.6,56851729.4,111131138.8
2017,2631236.4,21228324.2,13327932.8,56694989.4,93882482.8
2018,2544353.0,37214693.8,12344574.8,54849893.0,106953514.6


In [7]:
# Energy generation by type
oil_total = yearly_view["Oil"].sum()
hydro_total = yearly_view["Hydro River/Resivoir"].sum()
solar_total = yearly_view["Solar"].sum()
nuclear_total = yearly_view["Nuclear"].sum()
total_total = yearly_view["Year Total"].sum()

In [8]:
# Energy generation total for each energy type
yearly_view_totals = yearly_view
yearly_view_totals.loc['Total'] = yearly_view.sum(axis=0)

yearly_view_totals

Unnamed: 0_level_0,Oil,Hydro River/Resivoir,Solar,Nuclear,Year Total
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015,2906359.0,29953756.4,12795437.8,54975336.6,100630889.8
2016,2550127.8,39257440.0,12471841.6,56851729.4,111131138.8
2017,2631236.4,21228324.2,13327932.8,56694989.4,93882482.8
2018,2544353.0,37214693.8,12344574.8,54849893.0,106953514.6
Total,10632076.2,127654214.4,50939787.0,223371948.4,412598026.0


In [None]:
seasonal_veiw = better_big_df.groupby("Season")["Oil","Hydro River/Resivoir","Solar","Nuclear"].sum()/5

seasonal_veiw

In [None]:
# Display what one MW is -- exampples: https://www.vivintsolar.com/blog/what-is-a-megawatt-hour
# Display population sizes for each city -- bar chart
# Display total power production by type -- pie chart

## Madrid
Energy Generation vs Weather

In [9]:
# Create Dataframe for Madrid
madrid_big_df = better_big_df.loc[better_big_df["City"]=="Madrid"]

# Preview
madrid_big_df.head()

Unnamed: 0,Time,Month,Day,Year,Oil,Hydro River/Resivoir,Solar,Nuclear,City,Temp (F),Rain (mm),Snow (mm),Clouds,Weather Description,Season
1,2015-01-01 00:00:00+01:00,1,1,2015,162.0,2950.0,49.0,7096.0,Madrid,21.51,0.0,0.0,0,clear,Winter
6,2015-01-01 01:00:00+01:00,1,1,2015,158.0,2667.0,50.0,7096.0,Madrid,21.51,0.0,0.0,0,clear,Winter
11,2015-01-01 02:00:00+01:00,1,1,2015,157.0,2344.0,50.0,7099.0,Madrid,19.46,0.0,0.0,0,clear,Winter
16,2015-01-01 03:00:00+01:00,1,1,2015,160.0,1728.0,50.0,7098.0,Madrid,19.46,0.0,0.0,0,clear,Winter
21,2015-01-01 04:00:00+01:00,1,1,2015,156.0,1673.0,42.0,7097.0,Madrid,19.46,0.0,0.0,0,clear,Winter


## Bilbao
Energy Generation vs Weather

In [None]:
# Create Dataframe for Bilbao
bilbao_big_df = better_big_df.loc[better_big_df["City"]=="Bilbao"]

# Preview
bilbao_big_df.head()

## Barcelona
Energy Generation vs Weather

In [None]:
# Create Dataframe for Barcelona
barcelona_big_df = better_big_df.loc[better_big_df["City"]==" Barcelona"]
barcelona_big_df = barcelona_big_df.rename(columns={" Barcelona":"Barcelona"})

# Preview
barcelona_big_df.head()

## Seville
Energy Generation vs Weather

In [None]:
# Create Dataframe for Seville
seville_big_df = better_big_df.loc[better_big_df["City"]=="Seville"]

# Preivew
seville_big_df.head()

## Valencia
Energy Generation vs Weather

In [None]:
# Create Dataframe for Valencia
valencia_big_df = better_big_df.loc[better_big_df["City"]=="Valencia"]

# Preivew
valencia_big_df.head()