In [None]:
#!pip install simple_salesforce
#!pip install pymysql
#!pip install sqlalchemy

In [None]:
import json
from sqlalchemy import create_engine
from datetime import date
import pandas as pd
import pymysql
pymysql.install_as_MySQLdb()

In [None]:
# Make sure to use your own `config.py` file. Consider ensuring that these variable names are in sync
from config import sf_username, sf_password, sf_security_token
from config import remote_db_endpoint, remote_db_port
from config import remote_db_name, remote_db_user, remote_db_pwd

In [None]:
from simple_salesforce import Salesforce
sf = Salesforce(username=sf_username, password=sf_password, security_token=sf_security_token)

In [None]:
engine = create_engine(f"mysql://{remote_db_user}:{remote_db_pwd}@{remote_db_endpoint}:{remote_db_port}/{remote_db_name}")
conn = engine.connect()

## Prepare ETL for the Course data

In [None]:
course_data_df = pd.read_sql("SELECT * FROM course", conn)
course_data_df.head(30)

In [None]:
course_data_df.rename(columns={
    'CourseCode':'Course_Code__c',
    'CourseName':'Course_Name__c',
    'CreditHours':'Credit_Hours__c',
    'BootCampCourse':'Boot_Camp_Course__c',    
}, inplace=True)
course_data_df.head()

In [None]:
course_data_df = course_data_df[['Course_Code__c', 'Course_Name__c', 'Credit_Hours__c', 'Boot_Camp_Course__c']]
course_data_df.head()

In [None]:
course_data_records = course_data_df.to_dict('records')
course_data_records

In [None]:
for rec in course_data_records:

    record = {
        'Course_Code__c': rec['Course_Code__c'],
        'Course_Name__c': rec['Course_Name__c'],
        'Credit_Hours__c': rec['Credit_Hours__c'],
        'Boot_Camp_Course__c': rec['Boot_Camp_Course__c'],
    }
    
    try:
        sf.Course__c.create(record)
    except Exception as e:
        print(e)

In [None]:
# Bulk 
# sf.bulk.Course__c.insert(course_data_records)

## Create Course Lookup Table
You will use this later to crosswalk the course code with the primary key from the `Course` table

It is important to note that we will be querying **Salesforce** to retrieve the record IDs 

In [None]:
course_lookup_list = []

# The `Name` column in the primary key in Salesforce objects
# The Salesforce query language is called SOQL 
data = sf.query_all_iter("SELECT Name, Course_Code__c FROM Course__c")
for row in data:
    rec = {
        'ID_Course__c': row['Name'], # this is a critical line of code
        'Course_Code__c': row['Course_Code__c']
    }
    course_lookup_list.append(rec)
    
course_lookup_list

In [None]:
course_lookup_df = pd.DataFrame(course_lookup_list)
course_lookup_df

In [None]:
# Query the `Class` table from MySQL
query = '''
    SELECT 
        co.CourseCode,
        cl.*
    FROM 
        class cl
        INNER JOIN course co
        ON cl.ID_Course = co.ID_Course

'''

class_data_df = pd.read_sql(query, conn)
class_data_df.head()

In [None]:
class_data_df.rename(columns={
    'CourseCode':'Course_Code__c',
    'Section':'Section__c',
    'StartDate':'Start_Date__c',
    'EndDate':'End_Date__c',
    'CourseCode':'Course_Code__c'
}, inplace=True)


class_data_df = class_data_df[['Course_Code__c', 'Section__c', 'Start_Date__c', 'End_Date__c']]
class_data_df

## Join the Class DataFrame with the Course lookup table
This join is necessary to successfully lookup the foreign key for the Course table 

In [None]:
class_data_df = pd.merge(class_data_df, course_lookup_df, how='left')
#class_data_df.drop(columns = ['ID_Class','ID_Course','CourseName','CreditHours','BootCampCourse','Course_Code__c'], inplace=True)

class_data_df.head()

In [None]:
#class_data_df
class_data_df['Start_Date__c'] = class_data_df['Start_Date__c'].astype(str)
class_data_df['End_Date__c'] = class_data_df['End_Date__c'].astype(str)

class_data_df.head()

In [None]:
class_data_records = class_data_df.to_dict(orient='records')
class_data_records

## Insert `Class` Records into Salesforce

In [None]:
for rec in class_data_records:
 
    record = {
        'ID_Course__c': rec['ID_Course__c'],
        'Section__c': rec['Section__c'],
        'Start_Date__c': rec['Start_Date__c'],
        'End_Date__c': rec['End_Date__c'],
    }
    
    try:
        sf.Class__C.create(record)
    except Exception as e:
        print(e)

# Query `Student` Records from MySQL

In [None]:
query = '''
    SELECT 
        StudentID AS Student_ID__c,
        LastName AS Last_Name__c,
        FirstName AS First_Name__c,
        MiddleName AS Middle_Name__c,
        BirthDate AS Birth_Date__c,
        Gender AS Gender__c
    FROM
        student
'''

student_df = pd.read_sql(query, conn)

print(student_df.head())

student_dict = student_df.to_dict(orient='records')

student_dict

In [None]:
try:
    for rec in student_dict:
        sf.Student__c.create(rec)
except Exception as e:
    print(e)

# Class Participant Example 

In [None]:
sf_course_records = []

sf_course_data = sf.query('SELECT Name, Course_Code__c FROM Course__c')

for row in sf_course_data['records']:
    
    rec = {
        'ID_Course__c': row['Name'],
        'Course_Code__c': row['Course_Code__c']
    }
    
    sf_course_records.append(rec)
    
sf_course_df = pd.DataFrame(sf_course_records)
sf_course_df.head()

In [None]:
sf_class_records = []

sf_class_data = sf.query('SELECT ID_Course__c, Name, Section__c FROM Class__c')

for row in sf_class_data['records']:
    
    rec = {
        'ID_Course__c': row['ID_Course__c'],
        'ID_Class__c': row['Name'],
        'Section__c': row['Section__c']
    }
    
    sf_class_records.append(rec)
    
# temporary workaround to strip last three characters from ID_Course__c
for rec in sf_class_records:
    rec['ID_Course__c'] = rec['ID_Course__c'][:-3]
    
sf_class_df = pd.DataFrame(sf_class_records)
sf_class_df.head()

In [None]:
sf_class_xwalk = pd.merge(sf_class_df, sf_course_df)
sf_class_xwalk

In [None]:
sf_student_records = []

sf_student_data = sf.query('SELECT Name, Student_ID__c FROM Student__c')

for row in sf_student_data['records']:
    
    rec = {
        'ID_Student__c': row['Name'],
        'Student_ID__c': row['Student_ID__c']
    }
    
    sf_student_records.append(rec)
    
sf_student_xwalk = pd.DataFrame(sf_student_records)
sf_student_xwalk.head()

In [None]:
query = '''

    SELECT
        s.StudentID AS Student_ID__c,
        co.CourseCode AS Course_Code__c,
        cl.Section AS Section__c,
        cp.StartDate AS Start_Date__c,
        cp.EndDate AS End_Date__c
    FROM 
        classparticipant cp
        INNER JOIN class cl
        ON cp.ID_Class = cl.ID_Class
        INNER JOIN course co
        ON cl.ID_Course = co.ID_Course
        INNER JOIN student s
        ON cp.ID_Student = s.ID_Student

'''

classparticiant_df = pd.read_sql(query, conn)
classparticiant_df.head()

In [None]:
#sf_class_xwalk
#sf_student_xwalk

class_participant_load = pd.merge(classparticiant_df, sf_class_xwalk, on=['Course_Code__c', 'Section__c'])

class_participant_load = pd.merge(class_participant_load, sf_student_xwalk)

class_participant_load = class_participant_load[['ID_Student__c','ID_Class__c','Start_Date__c','End_Date__c']]

class_participant_load['Start_Date__c'] = class_participant_load['Start_Date__c'].astype(str)

class_participant_load

class_participant_records = class_participant_load.to_dict(orient='records')
class_participant_records

In [None]:
for row in class_participant_records:
    try:
        sf.Class_Participant__c.create(row)
    except Exception as e:
        print(e)

## Example of Deleting Records

Select the IDs of the records first and then process the results.

Ultimately, you want a list of IDs in the end.


In [None]:
class_records = sf.query("SELECT Name FROM Student__c")
recs_to_delete = [{'Id': r['Name']} for r in class_records['records']]
recs_to_delete

In [None]:
#sf.bulk.Course__c.delete(recs_to_delete)

In [None]:
for rec in recs_to_delete:
    try:
        sf.Student__c.delete(rec['Id'])
    except Exception as e:
        print(e)

In [None]:
sa_records = sf.query("SELECT Name FROM Staff_Assignment__c")
recs_to_delete = [{'Id': r['Name']} for r in sa_records['records']]
sf.bulk.Staff_Assignment__c.delete(recs_to_delete)

In [None]:
cp_records = sf.query("SELECT Name FROM Class_Participant__c")
recs_to_delete = [{'Id': r['Name']} for r in cp_records['records']]
sf.bulk.Class_Participant__c.delete(recs_to_delete)

In [None]:
class_records = sf.query("SELECT Name FROM Class__c")
recs_to_delete = [{'Id': r['Name']} for r in class_records['records']]
sf.bulk.Class__c.delete(recs_to_delete)

In [None]:
staff_records = sf.query("SELECT Name FROM Staff__c")
recs_to_delete = [{'Id': r['Name']} for r in staff_records['records']]
sf.bulk.Staff__c.delete(recs_to_delete)

In [None]:
student_records = sf.query("SELECT Name FROM Student__c")
recs_to_delete = [{'Id': r['Name']} for r in class_records['records']]
sf.bulk.Student__c.delete(recs_to_delete)

In [None]:
course_records = sf.query("SELECT Name FROM Course__c")
recs_to_delete = [{'Id': r['Name']} for r in course_records['records']]
sf.bulk.Course__c.delete(recs_to_delete)