## This notebook is for organizing and converting runresults data from Spark Ontario website

In [2]:
#Uncomment the pip line if pandas not already installed

#!pip install pandas
import pandas as pd
import numpy as np

### 1. Load csv file 

#### Manually type in file_name to be converted and output_file_name to be exported. 
#### If you want the output file to be exported to a directory other than the current one, uncomment and type in the path 

In [59]:
file_name = 'Input the file_name to be converted'
output_file_name = 'Input the output_file_name to be exported'
df = pd.read_csv(file_name,engine='python')
df.head()
#path='Uncomment and input path to be exported to'

Unnamed: 0,placement_name,placement_url,placement_organization,placement_interests,placement_location,placement_time,placement_description,placement_contact,placement_type
0,Recruiting Volunteers for Therapeutic Riding P...,https://www.sparkontario.ca/opp/recruiting-vol...,Pride Stables / Central Ontario Developmental ...,Disability Support\nSports & Physical Activiti...,"WHERE\rKitchener, ON",WHEN\rShort-term,Do you love horses? Love interacting and being...,Pride Stables / Central Ontario Developmental ...,
1,Fundraiser Campaign Volunteer,https://www.sparkontario.ca/opp/fundraiser-cam...,Salangai En Sangeetham&#039;s Fight for Cancer,"Arts, Crafts & Photography\rEvent Planning & P...",,WHEN\rShort-term,Earn more than 40 hours towards your high scho...,Salangai En Sangeetham&#039;s Fight for Cancer...,Virtual Opportunity
2,Virtual Community Outreach Volunteers,https://www.sparkontario.ca/opp/virtual-commun...,Citizens With Disabilities- Ontario,COVID-19 Support\rDisability Support\rWorking ...,,WHEN\rShort-term,CWDO is seeking Virtual Community Outreach vol...,Citizens With Disabilities- Ontario\rcwdo.org\...,Virtual Opportunity
3,Property Management Teams,https://www.sparkontario.ca/opp/property-manag...,The Couchiching Conservancy,Environment & Nature,"WHERE\rOrillia, ON",WHEN\rOngoing,"As with any property, maintenance is required ...",The Couchiching Conservancy\rwww.couchichingco...,
4,Goodwill Ambassador,https://www.sparkontario.ca/opp/goodwill-ambas...,Goodwill Industries of TECNO,Retail & Sales,"WHERE\rNewmarket, ON",WHEN\rOngoing,The volunteer will assist Goodwill by providin...,Goodwill Industries of TECNO\rwww.goodwill.on....,


In [None]:
df.shape

## 2. Select and rename columns for target file

#### ! column names with new format needs to be manually added in order to be identified 

In [60]:
column_names=[]

#job title
if 'placement_name' in df:
    df.rename(columns = {'placement_name':'job_title'}, inplace=True)
column_names.append('job_title')

#description
if 'placement_description' in df:
    df.rename(columns = {'placement_description':'description'}, inplace=True)
    column_names.append('description')

#application_process
if 'placement_contact' in df:
    df.rename(columns = {'placement_contact':'application_process'}, inplace=True)
if 'application_process' in df:
    column_names.append('application_process')

#placement_miscellaneous
column_names.append('placement_miscellaneous')

#position_url
if 'placement_url' in df:
    df.rename(columns = {'placement_url':'position_url'}, inplace=True)
if 'position_url' in df:
    column_names.append('position_url')

#####################################################################################
#position_category
if 'placement_interests' in df:
    df.rename(columns = {'placement_interests':'position_category'}, inplace=True)
if 'position_category' in df:
    column_names.append('position_category')    
    
#name
if 'placement_organization' in df:
    df.rename(columns = {'placement_organization':'name'}, inplace=True)
elif 'placement_org' in df:
    df.rename(columns = {'placement_org':'name'}, inplace=True)
if 'name' in df:
    column_names.append('name')
    
#position_dateposted
if 'placement_date_posted' in df:
    df.rename(columns = {'placement_date_posted':'position_dateposted'}, inplace=True)
if 'position_dateposted' in df:
    column_names.append('position_dateposted')
    
#position_deadline
if 'placement_deadline' in df:
    df.rename(columns = {'placement_deadline':'placement_deadline'}, inplace=True)
if 'position_deadline' in df:
    column_names.append('position_deadline')

#address
if 'placement_address' in df:
    df.rename(columns = {'placement_address':'address'}, inplace=True)
if 'address' in df:
    column_names.append('address')
    
if 'postal_code' in df:
    column_names.append('postal_code')
if 'created_at' in df:
    column_names.append('created_at')
if 'updated_at' in df:
    column_names.append('updated_at')
if 'expiration_date' in df:
    #default: "2100-07-20"
    column_names.append('expiration_date')
if 'organization_id' in df:
    column_names.append('organization_id')
if 'position_phone' in df:
    column_names.append('position_phone')

column_names

['job_title',
 'description',
 'application_process',
 'placement_miscellaneous',
 'position_url',
 'position_category',
 'name']

## 3. Create new dataframe with modified column names

#### placement_miscellaneous columns can be further modified if needed


In [None]:
df['placement_miscellaneous']=df['description']
df_placements = df[column_names]

index=0
df_placements['city'] = np.nan
for address in df['placement_location']:       
    if not address is np.nan: 
        line=address.split('\r')
        city=line[1].split(',')
        df_placements.loc[index,'city'] = city[0]
        index += 1

df_placements['province'] = 'ON'
df_placements['position_postedon']= 'Spark Ontario'        
df_placements.head()

###       Add skills&categories column

In [None]:
# add skills and categories column
for i in range (12):
    if i<6:
        index = '{} {}'.format('skills', i+1)
        df_placements[index] = np.nan
    else:
        index = '{} {}'.format('categories', i-5)
        df_placements[index] = np.nan

# skills and categories update column name
df_placements.rename(columns = {'skills 1':'skills','skills 2':'skills','skills 3':'skills',
                                'skills 4':'skills','skills 5':'skills','skills 6':'skills',
                                'categories 1':'categories','categories 2':'categories',
                               'categories 3':'categories','categories 4':'categories',
                               'categories 5':'categories','categories 6':'categories'}, inplace=True)
df_placements.head()

## 4. Export dataframe as a csv file

#### Choose to export to current file or to a target file

In [64]:
# Export to current file:
df_placements.to_csv(output_file_name, index=False, escapechar="\r")

# Or to export to a target file, uncomment below:
# import os
# output_file = os.path.join(path, output_file_name)
# df_placements.to_csv(output_file, index=False)