### Import Necessary Libraries

In [1]:
import pandas as pd

### Input CSV file

In [2]:
file = 'PD 2022 Wk 1 Input - Input.csv'
df = pd.read_csv(file)
df

Unnamed: 0,id,pupil first name,pupil last name,gender,Date of Birth,Parental Contact Name_1,Parental Contact Name_2,Preferred Contact Employer,Parental Contact
0,1,Ronna,Nellies,Female,12/21/2013,Purcell,Ketti,Demizz,1
1,2,Rusty,Andriulis,Male,7/21/2012,Vassili,Rivi,Brainbox,1
2,3,Roberta,Oakeshott,Female,12/4/2011,Lind,Haskell,Centidel,2
3,4,Lola,Rubinfajn,Male,6/29/2012,Elie,Tresa,Edgeblab,2
4,5,Kamila,Benedtti,Female,7/10/2012,Adela,Clevey,Trudoo,1
...,...,...,...,...,...,...,...,...,...
995,996,Ninetta,Worling,Female,2/15/2015,Sibbie,Peterus,Omba,2
996,997,Stanford,Tinton,Female,4/9/2013,Kyle,Augustin,Dabshots,1
997,998,Ertha,MacCook,Male,12/14/2013,Sidonia,Kessiah,Topicshots,2
998,999,Lawton,Randles,Female,12/12/2011,Kali,Reeva,Skalith,1


### Form the pupil's name correctly for the records

- in the format 'Last Name, First Name'

In [3]:
df["Pupil's Name"] = df['pupil last name'] + ', ' + df['pupil first name']
df["Pupil's Name"]

0          Nellies, Ronna
1        Andriulis, Rusty
2      Oakeshott, Roberta
3         Rubinfajn, Lola
4        Benedtti, Kamila
              ...        
995      Worling, Ninetta
996      Tinton, Stanford
997        MacCook, Ertha
998       Randles, Lawton
999     Pashbee, Engracia
Name: Pupil's Name, Length: 1000, dtype: object

### Format the parent's name in the same format as the pupil's

1. Split the dataframe according to content of 'Parental Contact'

In [6]:
df_1 = df[df['Parental Contact'] == 1].copy()
df_2 = df[df['Parental Contact'] == 2].copy()

2. Reformat the parent's name, using the student's last name and the correct column 

In [7]:
df_1['Parental Contact Full Name'] = df_1['pupil last name'] + ', ' + df_1['Parental Contact Name_1']
df_2['Parental Contact Full Name'] = df_2['pupil last name'] + ', ' + df_2['Parental Contact Name_2']

### Format the parental email address

We're still using separate dataframes to pull in the correct first name.

In [8]:
df_1['Parental Contact Email Address'] = df_1['Parental Contact Name_1']+ '.'+ df_1['pupil last name']+ df_1['Preferred Contact Employer'] + '.com'
df_2['Parental Contact Email Address'] = df_2['Parental Contact Name_2']+ '.'+ df_2['pupil last name']+ df_2['Preferred Contact Employer'] + '.com'

Rejoin the two separate dataframes

In [9]:
df_full = df_1.append(df_2)
df_full

Unnamed: 0,id,pupil first name,pupil last name,gender,Date of Birth,Parental Contact Name_1,Parental Contact Name_2,Preferred Contact Employer,Parental Contact,Pupil's Name,Parental Contact Full Name,Parental Contact Email Address
0,1,Ronna,Nellies,Female,12/21/2013,Purcell,Ketti,Demizz,1,"Nellies, Ronna","Nellies, Purcell",Purcell.NelliesDemizz.com
1,2,Rusty,Andriulis,Male,7/21/2012,Vassili,Rivi,Brainbox,1,"Andriulis, Rusty","Andriulis, Vassili",Vassili.AndriulisBrainbox.com
4,5,Kamila,Benedtti,Female,7/10/2012,Adela,Clevey,Trudoo,1,"Benedtti, Kamila","Benedtti, Adela",Adela.BenedttiTrudoo.com
5,6,Avery,Colebourn,Female,8/30/2012,Dalenna,Charley,Linktype,1,"Colebourn, Avery","Colebourn, Dalenna",Dalenna.ColebournLinktype.com
8,9,King,Truswell,Female,9/14/2012,Evvy,Othelia,Photospace,1,"Truswell, King","Truswell, Evvy",Evvy.TruswellPhotospace.com
...,...,...,...,...,...,...,...,...,...,...,...,...
987,988,Alane,Beidebeke,Male,11/22/2012,Frans,Leoine,Camimbo,2,"Beidebeke, Alane","Beidebeke, Leoine",Leoine.BeidebekeCamimbo.com
990,991,Gwen,Glowinski,Female,3/4/2012,Hillier,Trenna,Twitterworks,2,"Glowinski, Gwen","Glowinski, Trenna",Trenna.GlowinskiTwitterworks.com
992,993,Ludwig,Raine,Non-binary,2/18/2014,Chicky,Edy,Skilith,2,"Raine, Ludwig","Raine, Edy",Edy.RaineSkilith.com
995,996,Ninetta,Worling,Female,2/15/2015,Sibbie,Peterus,Omba,2,"Worling, Ninetta","Worling, Peterus",Peterus.WorlingOmba.com


### Create the academic year the pupils are in

1. Convert the Date of Birth field to a datetime

In [10]:
df_full["Date of Birth"] = pd.to_datetime(df_full['Date of Birth'])

2. Import datetime module for additional date parsing

In [11]:
import datetime as dt

3. Create a conditional to use with df.apply to calculate academic year

In [12]:
# Create a function to call with the apply method

def academic_year(row):
    if row['Date of Birth'] > dt.datetime(2014, 9, 1):
        return 1
    elif row['Date of Birth'] <= dt.datetime(2014, 9, 1) and row['Date of Birth'] > dt.datetime(2013, 9, 1):
        return 2
    elif row['Date of Birth'] <= dt.datetime(2013, 9, 1) and row['Date of Birth'] > dt.datetime(2012, 9, 1):
        return 3  
    elif row['Date of Birth'] <= dt.datetime(2012, 9, 1) and row['Date of Birth'] > dt.datetime(2011, 9, 1):
        return 4
    elif row['Date of Birth'] <= dt.datetime(2011, 9, 1) and row['Date of Birth'] > dt.datetime(2010, 9, 1):
        return 5 

In [13]:
df_full['Year'] = df_full.apply(academic_year, axis=1)
df_full

Unnamed: 0,id,pupil first name,pupil last name,gender,Date of Birth,Parental Contact Name_1,Parental Contact Name_2,Preferred Contact Employer,Parental Contact,Pupil's Name,Parental Contact Full Name,Parental Contact Email Address,Year
0,1,Ronna,Nellies,Female,2013-12-21,Purcell,Ketti,Demizz,1,"Nellies, Ronna","Nellies, Purcell",Purcell.NelliesDemizz.com,2
1,2,Rusty,Andriulis,Male,2012-07-21,Vassili,Rivi,Brainbox,1,"Andriulis, Rusty","Andriulis, Vassili",Vassili.AndriulisBrainbox.com,4
4,5,Kamila,Benedtti,Female,2012-07-10,Adela,Clevey,Trudoo,1,"Benedtti, Kamila","Benedtti, Adela",Adela.BenedttiTrudoo.com,4
5,6,Avery,Colebourn,Female,2012-08-30,Dalenna,Charley,Linktype,1,"Colebourn, Avery","Colebourn, Dalenna",Dalenna.ColebournLinktype.com,4
8,9,King,Truswell,Female,2012-09-14,Evvy,Othelia,Photospace,1,"Truswell, King","Truswell, Evvy",Evvy.TruswellPhotospace.com,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
987,988,Alane,Beidebeke,Male,2012-11-22,Frans,Leoine,Camimbo,2,"Beidebeke, Alane","Beidebeke, Leoine",Leoine.BeidebekeCamimbo.com,3
990,991,Gwen,Glowinski,Female,2012-03-04,Hillier,Trenna,Twitterworks,2,"Glowinski, Gwen","Glowinski, Trenna",Trenna.GlowinskiTwitterworks.com,4
992,993,Ludwig,Raine,Non-binary,2014-02-18,Chicky,Edy,Skilith,2,"Raine, Ludwig","Raine, Edy",Edy.RaineSkilith.com,2
995,996,Ninetta,Worling,Female,2015-02-15,Sibbie,Peterus,Omba,2,"Worling, Ninetta","Worling, Peterus",Peterus.WorlingOmba.com,1


### Remove extra columns

In [14]:
df_full = df_full[["Year", "Pupil's Name", "Parental Contact Full Name", "Parental Contact Email Address"]]
df_full

Unnamed: 0,Year,Pupil's Name,Parental Contact Full Name,Parental Contact Email Address
0,2,"Nellies, Ronna","Nellies, Purcell",Purcell.NelliesDemizz.com
1,4,"Andriulis, Rusty","Andriulis, Vassili",Vassili.AndriulisBrainbox.com
4,4,"Benedtti, Kamila","Benedtti, Adela",Adela.BenedttiTrudoo.com
5,4,"Colebourn, Avery","Colebourn, Dalenna",Dalenna.ColebournLinktype.com
8,3,"Truswell, King","Truswell, Evvy",Evvy.TruswellPhotospace.com
...,...,...,...,...
987,3,"Beidebeke, Alane","Beidebeke, Leoine",Leoine.BeidebekeCamimbo.com
990,4,"Glowinski, Gwen","Glowinski, Trenna",Trenna.GlowinskiTwitterworks.com
992,2,"Raine, Ludwig","Raine, Edy",Edy.RaineSkilith.com
995,1,"Worling, Ninetta","Worling, Peterus",Peterus.WorlingOmba.com


### Output to CSV

In [15]:
df_full.to_csv('pandas_solution.csv', index=False)