In [1]:
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('classic')
import plotly as py
from chart_studio.plotly import plot, iplot
import plotly.graph_objects as go
import numpy as np

In [2]:
subsidies = pd.read_csv("Input/Subsidies_update_2018.csv")

In [3]:
subsidies_cap = subsidies[subsidies['Category'] == 'Capacity-enhancing']

In [4]:
# create quality scores, by counting reported vs modelled for each subsidy type
# and calculating percentages, ignoring all zeros that state: 'not found evidence of subsidy'
# as zero subsidies will not be part of calculating the distribution of subsidies 
# 1 = 0-25% reported/total
# 2 = 26-50% reported/total
# 3 = 51-75% reported/total
# 4 = 76-100% reported/total

subsidies_cap_v2 = subsidies_cap.copy()
grouped_v2 = subsidies_cap_v2.groupby(['Country','Data Type'], as_index = False).count()
grouped_v2 = grouped_v2[['Country','Data Type','Type']]
type_counts = grouped_v2[grouped_v2['Data Type'] != 'not found evidence of subsidy']
type_count_pivot = type_counts.pivot(index = 'Country', columns = 'Data Type',values = 'Type')
type_count_pivot = type_count_pivot.fillna(0)
type_count_pivot['percent'] = type_count_pivot['Reported'] /(type_count_pivot['Modeled'] + type_count_pivot['Reported'])

In [5]:
# this function adds the score to each line in the dataframe

def quality_score_percent(row):
    val = row['percent']
    if val == 0:
        return 1
    if val < 0.25 or val == 0.25:
        return 1
    if val > 0.25 and val < 0.5 or val == 0.5:
        return 2
    if val > 0.5 and val < 0.75 or val == 0.75:
        return 3
    if val > 0.75 and val < 1 or val == 1:
        return 4
    else:
        return 999
# function applied to the dataframe creating a new column to display the quality score per country
type_count_pivot['quality_score'] = type_count_pivot.apply(quality_score_percent, axis=1)


In [6]:
# organizing the data to end up with one quality score by country
type_count_pivot = type_count_pivot.reset_index()
subsidies_quality_scores = type_count_pivot[['Country','quality_score']]
# output of the dataframe into .csv format so it can be used as an input dataframe
subsidies_quality_scores.to_csv("Input/subsidies_quality_scores.csv")