## Import Libraries

In [7]:
import pandas as pd

## Read csv file to dataframe.

In [8]:
# Read data file.
# Skip last 5 rows in data file as these are not part of data.
df = pd.read_csv("Education_Data_Raw.csv", 
                 skipfooter=5, engine='python')

## Process Data

In [9]:
# Rename column headers based on Year.
df.columns = list(df.columns[:4]) + ["Value" + x.split()[0] for x in df.columns[4:]]
df.columns

# Convert appropriate columns to numeric data.
yr_cols = df.columns[4:]
for col in yr_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Fill missing values with row average of year values.
df = df.apply(lambda row: row.fillna(row[yr_cols].mean()), axis=1)
df

# Convert dataframe from wide to long format.
df = pd.wide_to_long(df, ["Value"], i=["Country Name", df.columns[0]], j="Year")
df.reset_index(inplace=True)

# Reshape data from long to wide format.
df = df.pivot(index=["Country Name", "Year"], columns="Series Name", values="Value")
df.reset_index(inplace=True)

# Add 'Education_Key' column.
df['Education_Key'] = range(0, len(df))

# Rearrange columns in df.
cols = df.columns
cols = cols[[0, 1, 14, 5, 6, 8, 9, 7, 10, 12, 13, 11, 4, 2, 3]]
df = df[cols]

# Rename columns.
cols_names = [
    "Country",
    "Year",
    "Education_Key",
    "Public_Education_Spending",
    "School_Enrollment_Primary_%Gross",
    "School_Enrollment_Primary_Female_%Gross",
    "School_Enrollment_Primary_Male_%Gross",
    "School_Enrollment_Primary_%Net",
    "School_Enrollment_Secondary_%Gross",
    "School_Enrollment_Secondary_Female_%Gross",
    "School_Enrollment_Secondary_Male_%Gross",
    "School_Enrollment_Secondary_%Net",
    "Primary_Completion_Rate",
    "Primary_Completion_Rate_Female",
    "Primary_Completion_Rate_Male",
]
df.columns = cols_names

## Write data to csv file.

In [6]:
df.to_csv('Education_Processed_Table.csv', index=False)