In [1]:
# Import dependencies
import pandas as pd

In [2]:
# Read in the csv file
input_data = "Resources/track_sample_data.csv"
track_data = pd.read_csv(input_data)

In [3]:
track_data.head(1)

Unnamed: 0,Date,Product,Student Name,Other,Other 1,Other 2,Other 3,Other 4,Other 5,Other 6,Other 7,Other 8,Other 9,Other 10
0,02/21/2025 11:13:50 AM,CYO Track and Field - Boys and Girls Grades 1s...,"Adams, Riley",Date of Birth:2013-11-08,Grade Level:5,Parent/Guardian Name:Jessica Clark,Parent Phone Number: (361) 889-8429,Parent Email: jessica.clark@mail.com,Emergency Contact (If parent can't be reached)...,Emergency Contact Phone Number: (140) 685-5223,Shirt Size:Youth Medium,Short Size:Youth Medium,I have read the Athletic Agreement:Yes,I have read the Uniform and Equipment Agreemen...


In [4]:
track_data.columns

Index(['Date', 'Product', 'Student Name', 'Other', 'Other 1', 'Other 2',
       'Other 3', 'Other 4', 'Other 5', 'Other 6', 'Other 7', 'Other 8',
       'Other 9', 'Other 10'],
      dtype='object')

In [5]:
# Delete duplicate column
track_data.drop(columns=["Product", "Other 9", "Other 10"], inplace=True)
# Sometimes in the data to remove: "Order #", "Receipt ID", "User", "Department", "Promo Code", "G/L Account", "Student Type", "School", "Grade", "Homeroom", "Billed To", "Payment Method", "Status", "ACH Status", "Comment", "Payment on Account", "Batch ID", "Batch Date", "Confirmed", "Txn Source", "Sales Tag", "Reference"
track_data.head(1)

Unnamed: 0,Date,Student Name,Other,Other 1,Other 2,Other 3,Other 4,Other 5,Other 6,Other 7,Other 8
0,02/21/2025 11:13:50 AM,"Adams, Riley",Date of Birth:2013-11-08,Grade Level:5,Parent/Guardian Name:Jessica Clark,Parent Phone Number: (361) 889-8429,Parent Email: jessica.clark@mail.com,Emergency Contact (If parent can't be reached)...,Emergency Contact Phone Number: (140) 685-5223,Shirt Size:Youth Medium,Short Size:Youth Medium


In [6]:
# Rename columns
new_column_names = {"Other":"Date of Birth",
                    "Other 1":"Grade",
                    "Other 2":"Parent Name",
                    "Other 3":"Parent Phone Number",
                    "Other 4":"Parent Email",
                    "Other 5":"Emergency Contact Name",
                    "Other 6":"Emergency Contact Phone Number",
                    "Other 7":"Shirt Size",
                    "Other 8":"Short Size"}
track_data.rename(columns=new_column_names, inplace=True)
track_data.head(1)

Unnamed: 0,Date,Student Name,Date of Birth,Grade,Parent Name,Parent Phone Number,Parent Email,Emergency Contact Name,Emergency Contact Phone Number,Shirt Size,Short Size
0,02/21/2025 11:13:50 AM,"Adams, Riley",Date of Birth:2013-11-08,Grade Level:5,Parent/Guardian Name:Jessica Clark,Parent Phone Number: (361) 889-8429,Parent Email: jessica.clark@mail.com,Emergency Contact (If parent can't be reached)...,Emergency Contact Phone Number: (140) 685-5223,Shirt Size:Youth Medium,Short Size:Youth Medium


In [7]:
# Split student first and last name
track_data[["Student Last Name", "Student First Name"]] = track_data["Student Name"].str.split(",", expand=True)

# Drop original student name column
track_data.drop(columns=["Student Name"], inplace=True)

track_data.head(1)

Unnamed: 0,Date,Date of Birth,Grade,Parent Name,Parent Phone Number,Parent Email,Emergency Contact Name,Emergency Contact Phone Number,Shirt Size,Short Size,Student Last Name,Student First Name
0,02/21/2025 11:13:50 AM,Date of Birth:2013-11-08,Grade Level:5,Parent/Guardian Name:Jessica Clark,Parent Phone Number: (361) 889-8429,Parent Email: jessica.clark@mail.com,Emergency Contact (If parent can't be reached)...,Emergency Contact Phone Number: (140) 685-5223,Shirt Size:Youth Medium,Short Size:Youth Medium,Adams,Riley


In [8]:
# Make list of columns
column_list = track_data.columns.tolist()

# Reorder columns
column_list.insert(2, column_list.pop(column_list.index('Student First Name')))
column_list.insert(3, column_list.pop(column_list.index('Student Last Name')))

# Apply to data set
track_data = track_data[column_list]

track_data.head(1)

Unnamed: 0,Date,Date of Birth,Student First Name,Student Last Name,Grade,Parent Name,Parent Phone Number,Parent Email,Emergency Contact Name,Emergency Contact Phone Number,Shirt Size,Short Size
0,02/21/2025 11:13:50 AM,Date of Birth:2013-11-08,Riley,Adams,Grade Level:5,Parent/Guardian Name:Jessica Clark,Parent Phone Number: (361) 889-8429,Parent Email: jessica.clark@mail.com,Emergency Contact (If parent can't be reached)...,Emergency Contact Phone Number: (140) 685-5223,Shirt Size:Youth Medium,Short Size:Youth Medium


In [9]:
# Remove unnecessary words in columns
track_data["Date of Birth"] = track_data["Date of Birth"].str.replace("Date of Birth:", "", regex=False)
track_data["Grade"] = track_data["Grade"].str.replace("Grade Level:", "", regex=False)
track_data["Parent Name"] = track_data["Parent Name"].str.replace("Parent/Guardian Name:", "", regex=False)
track_data["Parent Phone Number"] = track_data["Parent Phone Number"].str.replace("Parent Phone Number:", "", regex=False)
track_data["Parent Email"] = track_data["Parent Email"].str.replace("Parent Email:", "", regex=False)
track_data["Emergency Contact Name"] = track_data["Emergency Contact Name"].str.replace("Emergency Contact (If parent can't be reached):", "", regex=False)
track_data["Emergency Contact Phone Number"] = track_data["Emergency Contact Phone Number"].str.replace("Emergency Contact Phone Number:", "", regex=False)
track_data["Shirt Size"] = track_data["Shirt Size"].str.replace("Shirt Size:", "", regex=False)
track_data["Short Size"] = track_data["Short Size"].str.replace("Short Size:", "", regex=False)

track_data.head(5)

Unnamed: 0,Date,Date of Birth,Student First Name,Student Last Name,Grade,Parent Name,Parent Phone Number,Parent Email,Emergency Contact Name,Emergency Contact Phone Number,Shirt Size,Short Size
0,02/21/2025 11:13:50 AM,2013-11-08,Riley,Adams,5,Jessica Clark,(361) 889-8429,jessica.clark@mail.com,Donald Martinez,(140) 685-5223,Youth Medium,Youth Medium
1,02/21/2025 11:15:00 AM,2016-03-04,Jordan,Bailey,3,Patricia White,(911) 500-9414,patricia.white@mail.com,Sarah Jackson,(909) 202-9067,Adult Medium,Adult Medium
2,02/24/2025 10:50:27 AM,2011-07-06,Avery,Bennett,7,George Gonzalez,(990) 955-3049,george.gonzalez@example.com,Barbara Anderson,(836) 720-2014,Youth Large,Youth Large
3,02/25/2025 09:39:49 AM,2015-11-25,Taylor,Brooks,3,Mary Garcia,(668) 743-3891,mary.garcia@example.com,Daniel Hernandez,(729) 258-3766,Youth Medium,Youth Medium
4,02/25/2025 09:39:56 AM,2017-05-11,Morgan,Campbell,2,John Anderson,(332) 723-2461,john.anderson@mail.com,Dorothy Brown,(935) 395-1762,Youth Medium,Youth Medium


In [10]:
# Split other name columns
track_data[["Parent First Name", "Parent Last Name"]] = track_data["Parent Name"].str.split(" ", n=1, expand=True)
track_data[["Emergency Contact First Name", "Emergency Contact Last Name"]] = track_data["Emergency Contact Name"].str.split(" ", n=1, expand=True)

# Drop original name columns
track_data.drop(columns=["Parent Name", "Emergency Contact Name"], inplace=True)

track_data.head(1)

Unnamed: 0,Date,Date of Birth,Student First Name,Student Last Name,Grade,Parent Phone Number,Parent Email,Emergency Contact Phone Number,Shirt Size,Short Size,Parent First Name,Parent Last Name,Emergency Contact First Name,Emergency Contact Last Name
0,02/21/2025 11:13:50 AM,2013-11-08,Riley,Adams,5,(361) 889-8429,jessica.clark@mail.com,(140) 685-5223,Youth Medium,Youth Medium,Jessica,Clark,Donald,Martinez


In [11]:
# Update list of columns
column_list = track_data.columns.tolist()

# Reorder columns
column_list.insert(4, column_list.pop(column_list.index("Grade")))
column_list.insert(6, column_list.pop(column_list.index("Parent First Name")))
column_list.insert(7, column_list.pop(column_list.index("Parent Last Name")))
column_list.insert(10, column_list.pop(column_list.index("Emergency Contact First Name")))
column_list.insert(11, column_list.pop(column_list.index("Emergency Contact Last Name")))

# Apply to data set
track_data = track_data[column_list]

track_data.head(1)

Unnamed: 0,Date,Date of Birth,Student First Name,Student Last Name,Grade,Parent Phone Number,Parent First Name,Parent Last Name,Parent Email,Emergency Contact Phone Number,Emergency Contact First Name,Emergency Contact Last Name,Shirt Size,Short Size
0,02/21/2025 11:13:50 AM,2013-11-08,Riley,Adams,5,(361) 889-8429,Jessica,Clark,jessica.clark@mail.com,(140) 685-5223,Donald,Martinez,Youth Medium,Youth Medium


In [12]:
# Read the new csv and the student_data.csv
# input_data_2 = "Resources/student_data.csv"
# student_data = pd.read_csv(input_data_2)
# student_data.head(1)

In [13]:
# Rename 'Student_Number' to 'Student #' in student_data
# student_data.rename(columns={"Student_Number": "Student #"}, inplace=True)
# student_data.head(1)

In [14]:
# Merge on 'Student Number' column
# final_track_data = pd.merge(track_data, student_data[['Student #', 'Gender']], on='Student #', how='left')
# final_track_data.head(1)

In [15]:
# # Move 'Date' to the last column and 'Gender' after 'Grade'
# final_columns = final_track_data.columns.tolist()

# # Remove 'Date' and 'Gender' from the list and insert them at the desired positions
# final_columns.remove('Date')
# final_columns.remove('Gender')

# # Insert 'Gender' after 'Grade'
# final_columns.insert(final_columns.index('Grade') + 1, 'Gender')

# # Append 'Date' at the end
# final_columns.append('Date')

# # Reorder the DataFrame
# final_track_data = final_track_data[final_columns]
# final_track_data.head()

In [16]:
# Save the final DataFrame to a new csv file
output_data = "Resources/final_track_data.csv"
# final_track_data.to_csv(output_data, index=False)
track_data.to_csv(output_data, index=False)