In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from os.path import isfile, join
import geopandas as gpd
import country_converter as coco


In [None]:
def div(a, b):
    if b == 0:
        return 0
    return a / b

In [None]:
# Condesing the dataset
df_total = pd.DataFrame()
data = [str(f) + '_patent_grant_processed.csv' for f in range(2005, 2023)]
print(data)

for file in data:
    if(not isfile(join('./data', file))):
        continue

    df = pd.read_csv(join('./data', file), index_col=0)

    temp = df.groupby('country', as_index=False).agg({
        'inventor females': sum,
        'inventor team size': sum,
        'attorney females': sum,
        'attorney team size': sum,
    })

    year = int(file.split('_')[0])
    temp['year'] = [year for _ in range(len(temp))]
    # temp['attorney female percentage'] = temp.apply(lambda row: round(div(row['attorney females'], row['attorney team size']), 2) * 100, axis=1)
    # temp['inventor female percentage'] = temp.apply(lambda row: round(row['inventor females'] / row['inventor team size'], 2) * 100, axis=1)
    # temp['total female percentage'] = temp.apply(lambda row: round((row['inventor females'] + row['attorney females']) / (row['inventor team size'] + row['attorney team size']), 2) * 100, axis=1)
    temp['number of patents'] = df.groupby('country', as_index=False).size()['size']

    df_total = pd.concat([df_total, temp], ignore_index=True)

df_total = df_total.groupby('country', as_index=False).agg({
    'inventor females': sum,
    'inventor team size': sum,
    'attorney females': sum,
    'attorney team size': sum,
    'number of patents': sum,
})

df_total['attorney female percentage'] = df_total.apply(lambda row: round(
    div(row['attorney females'], row['attorney team size']), 2) * 100,
                                                axis=1)
df_total['inventor female percentage'] = df_total.apply(lambda row: round(
    row['inventor females'] / row['inventor team size'], 2) * 100,
                                                        axis=1)

df_total.head()

In [None]:
def topKCountriesWithMostPatents(df: pd.DataFrame, top = 10):
    '''
    Top k countries per year which have the most patents
    '''

    # for year in range(2010, 2023):
    # test = df[df['year'] == year]
    # if test.shape[0] == 0:
    #     continue
    temp = df.sort_values(by=['number of patents'], ascending=False).iloc[:top]

    fig, ax = plt.subplots()
    ax.set_title('Top ' + str(top) +
                    ' Countries with Most Patents')

    temp.plot.bar(x='country', y=[
                    'attorney female percentage', 'inventor female percentage'], rot=40, ax=ax)
    for p in ax.patches:
        ax.annotate(int(p.get_height()),
                    (p.get_x(), p.get_height()))

    ax.set_xlabel('Country')
    ax.set_ylabel('Percentage')
    fig.tight_layout()
    fig.savefig('./graphs/bargraphs/' + 'Top_' + str(top) +
                '_Countries_By_Number_Of_Patents' + '.jpg', dpi=300)
    plt.close()

In [None]:
topKCountriesWithMostPatents(df_total)

In [None]:
def prepareWorldMap():
    """
    """
    # Setting the path to the shapefile
    SHAPEFILE = './worldMap/ne_10m_admin_0_countries.shp'

    # Read shapefile using Geopandas
    geo_df = gpd.read_file(SHAPEFILE)[['ADMIN', 'ADM0_A3', 'geometry']]

    # Rename columns.
    geo_df.columns = ['country', 'country_code', 'geometry']

    # Drop row for 'Antarctica'. It takes a lot of space in the map and is not of much use
    geo_df = geo_df.drop(geo_df.loc[geo_df['country'] == 'Antarctica'].index)

    # Next, we need to ensure that our data matches with the country codes.
    iso3_codes = geo_df['country'].to_list()

    # Convert to iso3_codes
    iso2_codes_list = coco.convert(names=iso3_codes, to='ISO2', not_found='NULL')

    # Add the list with iso2 codes to the dataframe
    geo_df['iso2_code'] = iso2_codes_list

    # There are some countries for which the converter could not find a country code.
    # We will drop these countries.
    geo_df = geo_df.drop(geo_df.loc[geo_df['iso2_code'] == 'NULL'].index)

    return geo_df


In [None]:
def getWorldHeatmaps(df: pd.DataFrame, top = 10):
    geo_df = prepareWorldMap()

    # for year in range(2010, 2023):
    # test = df[df['year'] == year]
    # if test.shape[0] == 0:
    #     continue

    temp = df.sort_values(by=['number of patents'], ascending=False).iloc[:top]
    temp = pd.merge(
        left=geo_df,
        right=temp,
        how='left',
        left_on='iso2_code',
        right_on='country').fillna(0).sort_values(
            by=['number of patents'], ascending=False).drop([
                'country_y', 'inventor females', 'inventor team size',
                'attorney females', 'attorney team size'
            ],
                                                            axis=1)

    cols = ['attorney female percentage', 'inventor female percentage']

    for col in cols:
        # Print the map
        # Set the range for the choropleth
        title = 'Top ' + str(top) + ' Countries with Largest Number of Patents'
        # col = 'total female percentage'
        source = col.capitalize()
        vmin = temp[col].min()
        vmax = temp[col].max()
        cmap = 'viridis'

        # Create figure and axes for Matplotlib
        fig, ax = plt.subplots(1, figsize=(20, 8))

        # Remove the axis
        ax.axis('off')
        temp.plot(column=col, ax=ax,  linewidth=1, cmap=cmap)

        # Add a title
        ax.set_title(title, fontdict={'fontsize': '25', 'fontweight': '3'})

        # Create an annotation for the data source
        ax.annotate(source, xy=(0.1, .08), xycoords='figure fraction', horizontalalignment='left',
                    verticalalignment='bottom', fontsize=10)

        # Create colorbar as a legend
        sm = plt.cm.ScalarMappable(norm=plt.Normalize(vmin=vmin, vmax=vmax), cmap=cmap)

        # Empty array for the data range
        sm._A = []

        # Add the colorbar to the figure
        cbaxes = fig.add_axes([0.15, 0.25, 0.01, 0.4])
        cbar = fig.colorbar(sm, cax=cbaxes)

        plt.close()
        # Safe the map to a png
        fig.savefig('./graphs/heatmaps/Heatmap_' + col + '.jpg', dpi=300)


In [None]:
getWorldHeatmaps(df_total)