In [None]:
import pandas as pd
import numpy as np
import feather
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
import seaborn as sns
import matplotlib.pyplot as plt
import os
import altair as alt
from pathlib import Path

## Import data 

In [None]:
datadir = "/mnt/PATSTAT/Solar/out/data"
figuredir = "/mnt/PATSTAT/Solar/out/figures"
Path(figuredir).mkdir(parents=True, exist_ok=True) # Create if doesn't exist

In [None]:
os.chdir(datadir)
patents_df = pd.read_feather("solar_dataset_all_renewables") 
patents_df.head()

In [None]:
patents_df["country_code"] = patents_df['person_ctry_code']
# patents_df["country_code"] = patents_df["appln_auth"] # cover all observations but assign the authority country not the innovator country. This mostly concerns Chinese authority applications. 

In [None]:
# Classify patents as fossil fuel or solar DO SAME FOR ALL RENEWABLES 

patents_df.loc[patents_df["cpc_class_symbol"].str.startswith("Y02E10/5", na=False), 'cpc_solar'] = 1
patents_df.loc[~patents_df["cpc_class_symbol"].str.startswith("Y02E10/5", na=False), 'cpc_solar'] = 0
patents_df.loc[patents_df["ipc_class_symbol"].str.startswith("H02S", na=False), 'ipc_solar'] = 1
patents_df.loc[~patents_df["ipc_class_symbol"].str.startswith("H02S", na=False), 'ipc_solar'] = 0
patents_df.loc[patents_df["ipc_class_symbol"].str.startswith("H02S", na=False) | patents_df["cpc_class_symbol"].str.startswith("Y02E10/5", na=False), 'mix_solar'] = 1 
patents_df.loc[~(patents_df["ipc_class_symbol"].str.startswith("H02S", na=False) | patents_df["cpc_class_symbol"].str.startswith("Y02E10/5", na=False)), 'mix_solar'] = 0

## Count patents 

### Absolute count ###

def absolute_count(df, code):
    global North

    ## fossil fuel ##
    solar_year = df[df.fossil_fuel == 1].groupby('earliest_filing_year')['docdb_family_id'].nunique().to_frame(name = "Nb_ff_energy_innov_per_year") # Transform the series that groupby creates to a dataframe
    solar_year["Nb_ff_energy_app_per_year"] = df[df.fossil_fuel == 1].groupby('earliest_filing_year')['appln_id'].nunique()

    # countries #
    dict = {} 
    for country in list_country:
        name = "Nb_ff_energy_innov_per_year_" + country
        value = df[(df.fossil_fuel == 1) & (df.country_code == country)].groupby('earliest_filing_year')['docdb_family_id'].nunique() 
        dict["Nb_ff_energy_innov_per_year_" + country] = value
        solar_year[name] = dict[name] 

    # "north" countries #
    column_ff_names_North = []
    North = [x for x in list_country if x != 'CN']
    for country in North :
        name = "Nb_ff_energy_innov_per_year_" + country
        column_ff_names_North.append(name)
    solar_year['Nb_ff_energy_innov_per_year_North']= solar_year[column_ff_names_North].sum(axis=1)


    ## Solar panel ##
    classification = code + "_solar"
    solar_year["Nb_solar_energy_innov_per_year"] = df[df[classification] == 1].groupby('earliest_filing_year')['docdb_family_id'].nunique()
    solar_year["Nb_solar_energy_app_per_year"] = df[df[classification] == 1].groupby('earliest_filing_year')['appln_id'].nunique()

    # countries #
    # by family
    dict = {} 
    for country in list_country:
        name = "Nb_solar_energy_innov_per_year_" + country
        value = df[(df[classification] == 1) & (df.country_code == country)].groupby('earliest_filing_year')['docdb_family_id'].nunique()
        dict["Nb_solar_energy_innov_per_year_" + country] = value
        solar_year[name] = dict[name] 

    # by application
    dict1 = {} 
    for country in list_country:
        name = "Nb_solar_energy_app_per_year_" + country
        value = df[(df[classification] == 1) & (df.country_code == country)].groupby('earliest_filing_year')['appln_id'].nunique()
        dict1["Nb_solar_energy_app_per_year_" + country] = value
        solar_year[name] = dict1[name] 

    # "north" countries #
    column_solar_names_North = []
    North = [x for x in list_country if x != 'CN']
    for country in North :
        name = "Nb_solar_energy_innov_per_year_" + country
        column_solar_names_North.append(name)
    solar_year['Nb_solar_energy_innov_per_year_North']= solar_year[column_solar_names_North].sum(axis=1)

    
    return solar_year