In [None]:
# Import Pandas and Matplotlib
import pandas as pd
import matplotlib.pyplot as plt
import requests
# Load the data into a pandas dataframe
df = pd.read_csv("survey_results_public.csv")

In [None]:
# Confirm data load  by displaying a few rows of the dataframe
df.head

In [None]:
# Select useful columns of data and rename to match use
df = df[['Country', 'EdLevel', 'YearsCodePro', 'Employment', 'ConvertedComp']]
df = df.rename({'ConvertedComp':'Salary'}, axis=1)
df.head

In [None]:
# Remove all NAN values from salary column and Convert to Ksh.
df = df[df['Salary'].notnull()]
url = 'https://v6.exchangerate-api.com/v6/d4bc26a1e2c3b8da4d623a49/latest/USD'
response = requests.get(url)
data = response.json()
exchange_rate = data['conversion_rates']['KES']
df['Salary'] = df['Salary'] * exchange_rate
df.head

In [None]:
# Check Info
df.info()

In [None]:
# Drop rows where any value is NAN
df = df.dropna()
# Count all null values
df.isnull().sum()

In [None]:
# Drop all part-timers
df = df[df['Employment'] == 'Employed full-time']
df = df.drop('Employment', axis=1)
df.info()


In [None]:
# Count data points from each country
df['Country'].value_counts()

In [None]:
# Function to remove low count data points
def shorten_categories(categories, cutoff):
    categorical_map = {}
    for i in range(len(categories)):
        if categories.values[i] >= cutoff:
            categorical_map[categories.index[i]] =  categories.index[i]
        else:
            categorical_map[categories.index[i]] = 'Other'
    return categorical_map

In [None]:
country_map = shorten_categories(df.Country.value_counts(), 400)
df['Country'] = df['Country'].map(country_map)
df['Country'].value_counts()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(10,10))
im = plt.imread('logo.png')
df.boxplot('Salary', 'Country', ax=ax)
plt.suptitle('Salary (KSh.) by Country')
plt.title('')
plt.ylabel('Salary (KSh.)')
plt.xlabel('Country')
plt.xticks(rotation=90)
newax = fig.add_axes([0.8,0.8,0.2,0.2], anchor='NE', zorder=1)
newax.imshow(im)
newax.axis('off')
plt.show()