Notebook used to preprocess the `EV Share Dataset` and get a JSON in the format:

```json
{
    "Country": {
        "Year": Share,
        "Year": Share,
        ...
    },
    ...
}
```

For more information about the data see:
> Hannah Ritchie (2024) - “Tracking global data on electric vehicles” <br>
> Published online at OurWorldinData.org. <br>
> Retrieved from: 'https://ourworldindata.org/electric-car-sales' [Online Resource]

In [None]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import pandas as pd

In [3]:
electric_car_share = pd.read_csv("~/Downloads/electric-car-sales-share/electric-car-sales-share.csv", sep=",")
electric_car_share.head()

Unnamed: 0,Entity,Code,Year,Share of new cars that are electric
0,Australia,AUS,2011,0.0061
1,Australia,AUS,2012,0.029
2,Australia,AUS,2013,0.033
3,Australia,AUS,2014,0.15
4,Australia,AUS,2015,0.19


In [4]:
electric_car_share["Entity"].unique()

array(['Australia', 'Austria', 'Belgium', 'Brazil', 'Canada', 'Chile',
       'China', 'Denmark', 'Europe', 'European Union (27)', 'Finland',
       'France', 'Germany', 'Greece', 'Iceland', 'India', 'Israel',
       'Italy', 'Japan', 'Mexico', 'Netherlands', 'New Zealand', 'Norway',
       'Poland', 'Portugal', 'Rest of World', 'South Korea', 'Spain',
       'Sweden', 'Switzerland', 'Turkey', 'United Kingdom',
       'United States', 'World'], dtype=object)

In [6]:
subset = [
    "Austria",
    "Belgium",
    "Denmark",
    "Finland",
    "France",
    "Germany",
    "Greece",
    "Italy",
    "Netherlands",
    "Norway",
    "Portugal",
    "Poland",
    "Spain",
    "Sweden",
    "Switzerland",
    "United Kingdom",
    "Europe",
]

df_electric = electric_car_share[electric_car_share["Entity"].isin(subset)]
df_electric = df_electric.drop(columns=["Code"])
df_electric = df_electric.rename(columns={"Share of new cars that are electric": "Share", "Entity": "Country"})
display(df_electric.head())

Unnamed: 0,Country,Year,Share
14,Austria,2013,0.2
15,Austria,2014,0.57
16,Austria,2015,0.9
17,Austria,2016,1.5
18,Austria,2017,2.0


In [8]:
transformed = {}

for _, row in df_electric.iterrows():
    year = str(row["Year"])
    country = row["Country"]
    share = row["Share"]
    
    if country not in transformed:
        transformed[country] = {}
    transformed[country][year] = share

with open("../data/ev/electric_car_share.json", "w") as f:
    json.dump(transformed, f, indent=2)