# Analyzing Data For dashboard

In [1]:
import json
import pandas as pd

In [2]:
# Read data scraped

with open('data/dawson_data.json', 'r') as f:
    data = json.loads(f.read())

In [3]:
#Convert data to dataframe for easier time for stats
df = pd.DataFrame(data)

In [4]:
#Change date to actual Timestamp type
df ['date'] = pd.to_datetime(df['date'])

In [5]:
#First get number of different things offered at Dawson
totalOffered = len(df)
totalOffered

76

In [6]:
# Gets amount of given type
def getNumberOfType(wantedType):
    #Get number of programs
    mask = df['type'] == wantedType
    number = len(df[mask])
    return number

In [7]:
#Main stats of amounts
numberPrograms = getNumberOfType('Program')
numberProfiles = getNumberOfType('Profile')
numberDisciplines = getNumberOfType('Discipline')
numberSpecial = getNumberOfType('Special Area of Study')
numberGeneral = getNumberOfType('General Education')

In [8]:
#Add just year to column
year = []
for date in df['date']:
    year.append(date.year)
    
df['year'] = year

yearCounts = df['year'].value_counts()
yearCounts

2018    35
2019    30
2014     4
2015     3
2017     2
2016     2
Name: year, dtype: int64

In [9]:
# Quick look at when were most programs updated
yearCounts.plot.pie()

<matplotlib.axes._subplots.AxesSubplot at 0x1ae81f8e668>

In [10]:
#Now programs sorted by 
newest = df.sort_values(by='date', ascending=False)
newest = (newest.drop('year', axis=1)).reset_index(drop=True)
newest.head()

Unnamed: 0,date,programName,type
0,2019-06-21,Nursing,Program
1,2019-06-19,Psychology,Profile
2,2019-06-19,New School,Special Area of Study
3,2019-06-19,English,General Education
4,2019-06-19,Multidisciplinary (Methods & Others),Discipline


## Dumping data into program stats

In [112]:
programStats = {
    
    'Total' : totalOffered,
    'Number of Programs' : numberPrograms,
    'Number of Profiles' : numberProfiles,
    'Number of Disciplines' : numberDisciplines,
    'Number of Special' : numberSpecial,
    'Number of General' : numberGeneral,
    'Year Counts' : yearCounts.to_json(),
    'Programs ordered newest' : newest.to_json()
    
}

with open('data/dawson_programs_stats.json', 'w') as f:
    json.dump(programStats, f)