In [1]:
import os
orig_working_directory=os.getcwd()
orig_working_directory

'C:\\Users\\clemi\\Desktop\\Clementine-GWU-HW\\ETL_Project'

In [2]:
os.chdir(os.path.join('..'))
curr_working_directory=os.getcwd()
curr_working_directory

'C:\\Users\\clemi\\Desktop\\Clementine-GWU-HW'

## Import Dependencies

In [3]:
import json
from sqlalchemy import create_engine
import pandas as pd
import pymysql
pymysql.install_as_MySQLdb()

## Import configuration variables

In [4]:
from salesforce_config import sf_username, sf_password, sf_security_token
from salesforce_config import remote_db_endpoint, remote_db_port
from salesforce_config import remote_db_name, remote_db_user, remote_db_pwd

## Set up simple_salesforce

In [5]:
from simple_salesforce import Salesforce
sf = Salesforce(username=sf_username, password=sf_password, security_token=sf_security_token)

## Connect to your MySQL database

In [6]:
engine = create_engine(f"mysql://{remote_db_user}:{remote_db_pwd}@{remote_db_endpoint}:{remote_db_port}/{remote_db_name}")
conn = engine.connect()

### Drop Existing records in Student table Salesforce

In [7]:
student_records = sf.query("SELECT Id FROM Student__c")
recs_to_delete = [{'Id': r['Id']} for r in student_records['records']]
recs_to_delete

[]

In [8]:
sf.bulk.Student__c.delete(recs_to_delete)

[]

In [9]:
#Drop Existing records in Class Participant table Salesforce

In [10]:
class_part_records = sf.query("SELECT Id FROM ClassParticipant__c")
recs_to_delete = [{'Id': r['Id']} for r in class_part_records['records']]
recs_to_delete

SalesforceMalformedRequest: Malformed request https://na111.salesforce.com/services/data/v42.0/query/?q=SELECT+Id+FROM+ClassParticipant__c. Response content: [{'message': "\nSELECT Id FROM ClassParticipant__c\n               ^\nERROR at Row:1:Column:16\nsObject type 'ClassParticipant__c' is not supported. If you are attempting to use a custom object, be sure to append the '__c' after the entity name. Please reference your WSDL or the describe call for the appropriate names.", 'errorCode': 'INVALID_TYPE'}]

In [11]:
sf.bulk.ClassParticipant__c.delete(recs_to_delete)

SalesforceMalformedRequest: Malformed request https://na111.salesforce.com/services/async/42.0/job. Response content: {'exceptionCode': 'InvalidJob', 'exceptionMessage': 'Unable to find object: ClassParticipant__c'}

In [None]:
#Drop Existing records in Class table Salesforce

In [None]:
class_records = sf.query("SELECT Id FROM class__c")
recs_to_delete = [{'Id': r['Id']} for r in class_records['records']]
recs_to_delete

In [None]:
sf.bulk.class__c.delete(recs_to_delete)

In [None]:
#Drop Existing records in Course table Salesforce

In [12]:
course_records = sf.query("SELECT Id FROM Course__c")
recs_to_delete = [{'Id': r['Id']} for r in course_records['records']]
recs_to_delete

[]

In [13]:
sf.bulk.Course__c.delete(recs_to_delete)

[]

In [None]:
#Drop Existing records in Staff assignment table Salesforce

In [None]:
staff_assignment_records = sf.query("SELECT Id FROM StaffAssignment__c")
recs_to_delete = [{'Id': r['Id']} for r in staff_assignment_records['records']]
recs_to_delete

In [None]:
sf.bulk.StaffAssignment__c.delete(recs_to_delete)

In [None]:
#Drop Existing records in Staff table Salesforce

In [None]:
staff_records = sf.query("SELECT Id FROM Staff__c")
recs_to_delete = [{'Id': r['Id']} for r in staff_records['records']]
recs_to_delete

In [None]:
sf.bulk.Staff__c.delete(recs_to_delete)

## Query Data

In [None]:
#Course data 

In [14]:
course_data = pd.read_sql("SELECT * FROM course", conn)
course_data.head(2)

Unnamed: 0,ID_Course,CourseCode,CourseName,CreditHours,BootCampCourse
0,1,BC-DATAVIZ,Data Visualization and Analytics,12,1
1,2,BC-WEBDEV,Full Stack Web Development,12,1


In [15]:
#Rename Columns

In [16]:
course_data=course_data.rename(columns={"ID_Course":"Name","CourseCode":"CourseCode__c","CourseName":"CourseName__c",
                                       "CreditHours":"CreditHours__c","BootCampCourse":"BootCampCourse__c"})
course_data.head()

Unnamed: 0,Name,CourseCode__c,CourseName__c,CreditHours__c,BootCampCourse__c
0,1,BC-DATAVIZ,Data Visualization and Analytics,12,1
1,2,BC-WEBDEV,Full Stack Web Development,12,1
2,3,BC-UIUX,User Interface/User Experience,12,1
3,4,CIS-349,Introduction to Databases,5,0
4,5,CIS-405,Database Programming,5,0


In [17]:
#Convert DataFrame to a list of Dictionaries

In [18]:
course_load = course_data.to_dict('records')
course_load

[{'Name': 1,
  'CourseCode__c': 'BC-DATAVIZ',
  'CourseName__c': 'Data Visualization and Analytics',
  'CreditHours__c': 12,
  'BootCampCourse__c': 1},
 {'Name': 2,
  'CourseCode__c': 'BC-WEBDEV',
  'CourseName__c': 'Full Stack Web Development',
  'CreditHours__c': 12,
  'BootCampCourse__c': 1},
 {'Name': 3,
  'CourseCode__c': 'BC-UIUX',
  'CourseName__c': 'User Interface/User Experience',
  'CreditHours__c': 12,
  'BootCampCourse__c': 1},
 {'Name': 4,
  'CourseCode__c': 'CIS-349',
  'CourseName__c': 'Introduction to Databases',
  'CreditHours__c': 5,
  'BootCampCourse__c': 0},
 {'Name': 5,
  'CourseCode__c': 'CIS-405',
  'CourseName__c': 'Database Programming',
  'CreditHours__c': 5,
  'BootCampCourse__c': 0},
 {'Name': 6,
  'CourseCode__c': 'CIS-438',
  'CourseName__c': 'Database Administration',
  'CreditHours__c': 5,
  'BootCampCourse__c': 0},
 {'Name': 7,
  'CourseCode__c': 'CIS-430',
  'CourseName__c': 'Business Systems Programming I',
  'CreditHours__c': 5,
  'BootCampCourse__c'

In [19]:
#Inserting rows in bulk

In [20]:
try:
    sf.bulk.Course__c.insert(course_load)
except Exception as e:
    print(e)

# Class data

In [21]:
class_data = pd.read_sql("SELECT * FROM class", conn)
class_data.head(5)

Unnamed: 0,ID_Class,ID_Course,Section,StartDate,EndDate
0,1,1,GWU-ARL-DATA-PT-09-0,2020-03-16,2020-03-14
1,2,1,GWDC201805DATA3,2018-05-15,2018-11-08
2,3,2,GWARL201905WEB3,2019-05-14,2019-11-07
3,4,3,GWARL201905UIUX3,2019-05-14,2019-11-07


In [22]:
class_data=class_data.rename(columns={"ID_Class":"Name","ID_Course":"ID_Course__c","Section":"Section__c",
                                      "StartDate":"StartDate__c","EndDate":"EndDate__c"})

class_data.head(5)

Unnamed: 0,Name,ID_Course__c,Section__c,StartDate__c,EndDate__c
0,1,1,GWU-ARL-DATA-PT-09-0,2020-03-16,2020-03-14
1,2,1,GWDC201805DATA3,2018-05-15,2018-11-08
2,3,2,GWARL201905WEB3,2019-05-14,2019-11-07
3,4,3,GWARL201905UIUX3,2019-05-14,2019-11-07


In [23]:
class_data["StartDate__c"]=class_data["StartDate__c"].astype(str)
class_data["EndDate__c"]=class_data["EndDate__c"].astype(str)

In [24]:
#Extract Record ID

In [28]:
course_query=sf.query("SELECT Name from Course__c")

In [29]:
course_query

OrderedDict([('totalSize', 8),
             ('done', True),
             ('records',
              [OrderedDict([('attributes',
                             OrderedDict([('type', 'Course__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Course__c/a0G3h0000011zlUEAQ')])),
                            ('Name', '1')]),
               OrderedDict([('attributes',
                             OrderedDict([('type', 'Course__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Course__c/a0G3h0000011zlVEAQ')])),
                            ('Name', '2')]),
               OrderedDict([('attributes',
                             OrderedDict([('type', 'Course__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Course__c/a0G3h0000011zlWEAQ')])),
                        

In [31]:
course_dict={}
x=0
while x< len(course_query['records']):
    url=course_query['records'][x]['attributes']['url'].split('/')[6]
    course_id=int(course_query['records'][x]['Name'])
    print(url,course_id)
    course_dict[course_id]=url
    x=x+1
print(course_dict)

a0G3h0000011zlUEAQ 1
a0G3h0000011zlVEAQ 2
a0G3h0000011zlWEAQ 3
a0G3h0000011zlXEAQ 4
a0G3h0000011zlYEAQ 5
a0G3h0000011zlZEAQ 6
a0G3h0000011zlaEAA 7
a0G3h0000011zlbEAA 8
{1: 'a0G3h0000011zlUEAQ', 2: 'a0G3h0000011zlVEAQ', 3: 'a0G3h0000011zlWEAQ', 4: 'a0G3h0000011zlXEAQ', 5: 'a0G3h0000011zlYEAQ', 6: 'a0G3h0000011zlZEAQ', 7: 'a0G3h0000011zlaEAA', 8: 'a0G3h0000011zlbEAA'}


In [32]:
for index,row in class_data.iterrows():
    try:
        class_data['ID_Course__c'][index]=course_dict[row['ID_Course__c']]
        
    except KeyError as e:
        print(e)
class_data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


Unnamed: 0,Name,ID_Course__c,Section__c,StartDate__c,EndDate__c
0,1,a0G3h0000011zlUEAQ,GWU-ARL-DATA-PT-09-0,2020-03-16,2020-03-14
1,2,a0G3h0000011zlUEAQ,GWDC201805DATA3,2018-05-15,2018-11-08
2,3,a0G3h0000011zlVEAQ,GWARL201905WEB3,2019-05-14,2019-11-07
3,4,a0G3h0000011zlWEAQ,GWARL201905UIUX3,2019-05-14,2019-11-07


In [33]:
class_load = class_data.to_dict('records')
class_load

[{'Name': 1,
  'ID_Course__c': 'a0G3h0000011zlUEAQ',
  'Section__c': 'GWU-ARL-DATA-PT-09-0',
  'StartDate__c': '2020-03-16',
  'EndDate__c': '2020-03-14'},
 {'Name': 2,
  'ID_Course__c': 'a0G3h0000011zlUEAQ',
  'Section__c': 'GWDC201805DATA3',
  'StartDate__c': '2018-05-15',
  'EndDate__c': '2018-11-08'},
 {'Name': 3,
  'ID_Course__c': 'a0G3h0000011zlVEAQ',
  'Section__c': 'GWARL201905WEB3',
  'StartDate__c': '2019-05-14',
  'EndDate__c': '2019-11-07'},
 {'Name': 4,
  'ID_Course__c': 'a0G3h0000011zlWEAQ',
  'Section__c': 'GWARL201905UIUX3',
  'StartDate__c': '2019-05-14',
  'EndDate__c': '2019-11-07'}]

In [34]:
try:
    sf.bulk.class__c.insert(class_load)
except Exception as e:
    print(e)

# STUDENT DATA

In [None]:
#Transform Data

In [35]:
student_data = pd.read_sql("SELECT * FROM student", conn)
student_data.head(5)

Unnamed: 0,ID_Student,StudentID,LastName,FirstName,MiddleName,BirthDate,Gender
0,33,25004961,Moore,Heather,Alice,,F
1,34,25003514,Multak,Ilana,Cecille,,F
2,35,25005833,Murillo,Jessica,Dorothy,,F
3,36,25002589,Romanowski,Kandra,Genevieve,,F
4,37,25007185,Hoffer,Katherine,Lynnette,,F


In [36]:
#student_data['FullName'] = student_data['FirstName'].str.cat(student_data['LastName'], sep=" ")
student_data=student_data.rename(columns={"ID_Student":"Name","StudentID":"StudentID__c","LastName":"LastName__c",
                                       "FirstName":"FirstName__c","MiddleName":"MiddleName__c","Gender":"Gender__c",
                                        "BirthDate":"BirthDate__c"})
student_data.head(5)

Unnamed: 0,Name,StudentID__c,LastName__c,FirstName__c,MiddleName__c,BirthDate__c,Gender__c
0,33,25004961,Moore,Heather,Alice,,F
1,34,25003514,Multak,Ilana,Cecille,,F
2,35,25005833,Murillo,Jessica,Dorothy,,F
3,36,25002589,Romanowski,Kandra,Genevieve,,F
4,37,25007185,Hoffer,Katherine,Lynnette,,F


In [37]:
#Convert DataFrame to a list of Dictionaries

In [38]:
student_load = student_data.to_dict('records')
student_load

[{'Name': 33,
  'StudentID__c': '25004961',
  'LastName__c': 'Moore',
  'FirstName__c': 'Heather',
  'MiddleName__c': 'Alice',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'Name': 34,
  'StudentID__c': '25003514',
  'LastName__c': 'Multak',
  'FirstName__c': 'Ilana',
  'MiddleName__c': 'Cecille',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'Name': 35,
  'StudentID__c': '25005833',
  'LastName__c': 'Murillo',
  'FirstName__c': 'Jessica',
  'MiddleName__c': 'Dorothy',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'Name': 36,
  'StudentID__c': '25002589',
  'LastName__c': 'Romanowski',
  'FirstName__c': 'Kandra',
  'MiddleName__c': 'Genevieve',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'Name': 37,
  'StudentID__c': '25007185',
  'LastName__c': 'Hoffer',
  'FirstName__c': 'Katherine',
  'MiddleName__c': 'Lynnette',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'Name': 38,
  'StudentID__c': '25006014',
  'LastName__c': 'Poocharoen',
  'FirstName__c': 'Pariya',
  'MiddleName__c':

In [39]:
#Inserting rows in bulk

In [40]:
try:
    sf.bulk.Student__c.insert(student_load) #Student__c is the API name
except Exception as e:
    print(e)

### Class participant

In [54]:
participant_data = pd.read_sql("SELECT * FROM classparticipant", conn)
participant_data.head(4)

Unnamed: 0,ID_ClassParticipant,ID_Student,ID_Class,StartDate,EndDate
0,1,33,1,2020-03-16,
1,2,34,1,2020-03-16,
2,3,35,1,2020-03-16,
3,4,62,1,2020-03-16,


In [55]:
participant_data=participant_data.rename(columns={"ID_ClassParticipant":"Name","ID_Student":"ID_Student__c",
                                        "ID_Class":"ID_Class__c","StartDate":"StartDate__c","EndDate":"EndDate__c"})
#convert date to string otherwise we will get JSON nonserilizable error
participant_data["StartDate__c"]=participant_data["StartDate__c"].astype(str)
participant_data.head()

Unnamed: 0,Name,ID_Student__c,ID_Class__c,StartDate__c,EndDate__c
0,1,33,1,2020-03-16,
1,2,34,1,2020-03-16,
2,3,35,1,2020-03-16,
3,4,62,1,2020-03-16,
4,5,36,1,2020-03-16,


In [56]:
#Extract Record ID for student

In [57]:
student_query=sf.query("SELECT Name from Student__c")
student_query

OrderedDict([('totalSize', 30),
             ('done', True),
             ('records',
              [OrderedDict([('attributes',
                             OrderedDict([('type', 'Student__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Student__c/a0A3h000002c8GmEAI')])),
                            ('Name', '33')]),
               OrderedDict([('attributes',
                             OrderedDict([('type', 'Student__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Student__c/a0A3h000002c8GnEAI')])),
                            ('Name', '34')]),
               OrderedDict([('attributes',
                             OrderedDict([('type', 'Student__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Student__c/a0A3h000002c8GoEAI')])),
               

In [51]:
student_dict={}
x=0
while x< len(student_query['records']):
    url=student_query['records'][x]['attributes']['url'].split('/')[6]
    student_id=int(student_query['records'][x]['Name'])
    print(url,student_id)
    student_dict[student_id]=url
    x=x+1
print(student_dict)

a0A3h000002c8GmEAI 33
a0A3h000002c8GnEAI 34
a0A3h000002c8GoEAI 35
a0A3h000002c8GpEAI 36
a0A3h000002c8GqEAI 37
a0A3h000002c8GrEAI 38
a0A3h000002c8GsEAI 39
a0A3h000002c8GtEAI 40
a0A3h000002c8GuEAI 41
a0A3h000002c8GvEAI 42
a0A3h000002c8GwEAI 43
a0A3h000002c8GxEAI 44
a0A3h000002c8GyEAI 45
a0A3h000002c8GzEAI 46
a0A3h000002c8H0EAI 47
a0A3h000002c8H1EAI 48
a0A3h000002c8H2EAI 49
a0A3h000002c8H3EAI 50
a0A3h000002c8H4EAI 51
a0A3h000002c8H5EAI 52
a0A3h000002c8H6EAI 53
a0A3h000002c8H7EAI 54
a0A3h000002c8H8EAI 55
a0A3h000002c8H9EAI 56
a0A3h000002c8HAEAY 57
a0A3h000002c8HBEAY 58
a0A3h000002c8HCEAY 59
a0A3h000002c8HDEAY 60
a0A3h000002c8HEEAY 61
a0A3h000002c8HFEAY 62
{33: 'a0A3h000002c8GmEAI', 34: 'a0A3h000002c8GnEAI', 35: 'a0A3h000002c8GoEAI', 36: 'a0A3h000002c8GpEAI', 37: 'a0A3h000002c8GqEAI', 38: 'a0A3h000002c8GrEAI', 39: 'a0A3h000002c8GsEAI', 40: 'a0A3h000002c8GtEAI', 41: 'a0A3h000002c8GuEAI', 42: 'a0A3h000002c8GvEAI', 43: 'a0A3h000002c8GwEAI', 44: 'a0A3h000002c8GxEAI', 45: 'a0A3h000002c8GyEAI', 4

In [59]:
for index,row in participant_data.iterrows():
    participant_data['ID_Student__c'][index]=student_dict[row['ID_Student__c']]
participant_data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Name,ID_Student__c,ID_Class__c,StartDate__c,EndDate__c
0,1,a0A3h000002c8GmEAI,1,2020-03-16,
1,2,a0A3h000002c8GnEAI,1,2020-03-16,
2,3,a0A3h000002c8GoEAI,1,2020-03-16,
3,4,a0A3h000002c8HFEAY,1,2020-03-16,
4,5,a0A3h000002c8GpEAI,1,2020-03-16,
5,6,a0A3h000002c8GqEAI,1,2020-03-16,
6,7,a0A3h000002c8GrEAI,1,2020-03-16,
7,8,a0A3h000002c8GsEAI,1,2020-03-16,
8,9,a0A3h000002c8GtEAI,1,2020-03-16,
9,10,a0A3h000002c8GuEAI,1,2020-03-16,


In [None]:
#Obtaining ID_Class record IDs and 

In [49]:
class_query=sf.query("SELECT Name from class__c")
class_query

OrderedDict([('totalSize', 4),
             ('done', True),
             ('records',
              [OrderedDict([('attributes',
                             OrderedDict([('type', 'class__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/class__c/a0F3h0000026uSZEAY')])),
                            ('Name', '1')]),
               OrderedDict([('attributes',
                             OrderedDict([('type', 'class__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/class__c/a0F3h0000026uSaEAI')])),
                            ('Name', '2')]),
               OrderedDict([('attributes',
                             OrderedDict([('type', 'class__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/class__c/a0F3h0000026uSbEAI')])),
                            ('

In [72]:
class_dict={}
x=0
while x< len(class_query['records']):
    url=class_query['records'][x]['attributes']['url'].split('/')[6]
    class_id=int(class_query['records'][x]['Name'])
    print(url,class_id)
    class_dict[class_id]=url
    x=x+1
print(class_dict)

a0F3h0000026uSZEAY 1
a0F3h0000026uSaEAI 2
a0F3h0000026uSbEAI 3
a0F3h0000026uScEAI 4
{1: 'a0F3h0000026uSZEAY', 2: 'a0F3h0000026uSaEAI', 3: 'a0F3h0000026uSbEAI', 4: 'a0F3h0000026uScEAI'}


In [73]:
for index,row in participant_data.iterrows():
    try:
        participant_data['ID_Class__c'][index]=class_dict[row['ID_Class__c']]
    except KeyError as e:
        print(e)
participant_data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Name,ID_Student__c,ID_Class__c,StartDate__c,EndDate__c
0,1,a0A3h000002c8GmEAI,a0F3h0000026uSZEAY,2020-03-16,
1,2,a0A3h000002c8GnEAI,a0F3h0000026uSZEAY,2020-03-16,
2,3,a0A3h000002c8GoEAI,a0F3h0000026uSZEAY,2020-03-16,
3,4,a0A3h000002c8HFEAY,a0F3h0000026uSZEAY,2020-03-16,
4,5,a0A3h000002c8GpEAI,a0F3h0000026uSZEAY,2020-03-16,
5,6,a0A3h000002c8GqEAI,a0F3h0000026uSZEAY,2020-03-16,
6,7,a0A3h000002c8GrEAI,a0F3h0000026uSZEAY,2020-03-16,
7,8,a0A3h000002c8GsEAI,a0F3h0000026uSZEAY,2020-03-16,
8,9,a0A3h000002c8GtEAI,a0F3h0000026uSZEAY,2020-03-16,
9,10,a0A3h000002c8GuEAI,a0F3h0000026uSZEAY,2020-03-16,


In [74]:
participant_load = participant_data.to_dict('records')
participant_load

[{'Name': 1,
  'ID_Student__c': 'a0A3h000002c8GmEAI',
  'ID_Class__c': 'a0F3h0000026uSZEAY',
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 2,
  'ID_Student__c': 'a0A3h000002c8GnEAI',
  'ID_Class__c': 'a0F3h0000026uSZEAY',
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 3,
  'ID_Student__c': 'a0A3h000002c8GoEAI',
  'ID_Class__c': 'a0F3h0000026uSZEAY',
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 4,
  'ID_Student__c': 'a0A3h000002c8HFEAY',
  'ID_Class__c': 'a0F3h0000026uSZEAY',
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 5,
  'ID_Student__c': 'a0A3h000002c8GpEAI',
  'ID_Class__c': 'a0F3h0000026uSZEAY',
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 6,
  'ID_Student__c': 'a0A3h000002c8GqEAI',
  'ID_Class__c': 'a0F3h0000026uSZEAY',
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 7,
  'ID_Student__c': 'a0A3h000002c8GrEAI',
  'ID_Class__c': 'a0F3h0000026uSZEAY',
  'StartDate__c': '

In [75]:
#Inserting rows in bulk

In [76]:
try:
    sf.bulk.ClassParticipant__c.insert(participant_load)
except Exception as e:
    print(e)

# Staff

In [77]:
staff_data = pd.read_sql("SELECT * FROM staff", conn)
staff_data.head(5)

Unnamed: 0,ID_Staff,EmployeeID,LastName,FirstName,MiddleName,BirthDate
0,1,184220,Wimberly,Sam,Nico,1995-07-05
1,2,130109,Sanford,Gemini,Blair,1992-04-22
2,3,160655,Williams,Dartanion,De Angelo,1993-05-21
3,4,159108,Popelka,Sarah,Nisan,1994-06-07


In [78]:
#staff_data['FullName'] = staff_data['FirstName'].str.cat(staff_data['LastName'], sep=" ")
staff_data=staff_data.rename(columns={"ID_Staff":"Name","EmployeeID":"EmployeeID__c","LastName":"LastName__c",
                                       "FirstName":"FirstName__c","MiddleName":"MiddleName__c",
                                      "BirthDate":"BirthDate__c"})
#convert date to string otherwise we will get JSON nonserilizable error
staff_data["BirthDate__c"]=staff_data["BirthDate__c"].astype(str)
staff_data.head()

Unnamed: 0,Name,EmployeeID__c,LastName__c,FirstName__c,MiddleName__c,BirthDate__c
0,1,184220,Wimberly,Sam,Nico,1995-07-05
1,2,130109,Sanford,Gemini,Blair,1992-04-22
2,3,160655,Williams,Dartanion,De Angelo,1993-05-21
3,4,159108,Popelka,Sarah,Nisan,1994-06-07


In [79]:
#staff_data['FullName'] = staff_data['FirstName'].str.cat(staff_data['LastName'], sep=" ")
staff_data=staff_data.rename(columns={"ID_Staff":"ID_Staff__c","EmployeeID":"EmployeeID__c","LastName":"Name",
                                       "FirstName":"FirstName__c","MiddleName":"MiddleName__c",
                                      "BirthDate":"BirthDate__c"})
#convert date to string otherwise we will get JSON nonserilizable error
staff_data["BirthDate__c"]=staff_data["BirthDate__c"].astype(str)
staff_data.head()

Unnamed: 0,Name,EmployeeID__c,LastName__c,FirstName__c,MiddleName__c,BirthDate__c
0,1,184220,Wimberly,Sam,Nico,1995-07-05
1,2,130109,Sanford,Gemini,Blair,1992-04-22
2,3,160655,Williams,Dartanion,De Angelo,1993-05-21
3,4,159108,Popelka,Sarah,Nisan,1994-06-07


In [80]:
staff_load = staff_data.to_dict('records')
staff_load

[{'Name': 1,
  'EmployeeID__c': '000184220',
  'LastName__c': 'Wimberly',
  'FirstName__c': 'Sam',
  'MiddleName__c': 'Nico',
  'BirthDate__c': '1995-07-05'},
 {'Name': 2,
  'EmployeeID__c': '000130109',
  'LastName__c': 'Sanford',
  'FirstName__c': 'Gemini',
  'MiddleName__c': 'Blair',
  'BirthDate__c': '1992-04-22'},
 {'Name': 3,
  'EmployeeID__c': '000160655',
  'LastName__c': 'Williams',
  'FirstName__c': 'Dartanion',
  'MiddleName__c': 'De Angelo',
  'BirthDate__c': '1993-05-21'},
 {'Name': 4,
  'EmployeeID__c': '000159108',
  'LastName__c': 'Popelka',
  'FirstName__c': 'Sarah',
  'MiddleName__c': 'Nisan',
  'BirthDate__c': '1994-06-07'}]

In [81]:
try:
    sf.bulk.Staff__c.insert(staff_load)
except Exception as e:
    print(e)

# Staff Assignment

In [82]:
staffassignment_data = pd.read_sql("SELECT * FROM staffassignment", conn)
staffassignment_data

Unnamed: 0,ID_StaffAssignment,ID_Staff,ID_Class,Role,StartDate,EndDate
0,1,1,1,Teacher Assistant,2020-03-16,
1,2,4,1,Teacher Assistant,2020-03-16,
2,3,3,1,Instructor,2020-03-16,


## Rename Columns

In [83]:
staffassignment_data=staffassignment_data.rename(columns={"ID_StaffAssignment":"Name",
                                                          "ID_Staff":"ID_Staff__c", "ID_Class":"ID_Class__c","Role":"Role__c",
                                                           "StartDate":"StartDate__c","EndDate":"EndDate__c"})
#convert date to string otherwise we will get JSON nonserilizable error
staffassignment_data["StartDate__c"]=staffassignment_data["StartDate__c"].astype(str)

staffassignment_data.head()

Unnamed: 0,Name,ID_Staff__c,ID_Class__c,Role__c,StartDate__c,EndDate__c
0,1,1,1,Teacher Assistant,2020-03-16,
1,2,4,1,Teacher Assistant,2020-03-16,
2,3,3,1,Instructor,2020-03-16,


## Convert DataFrame to a list of Dictionaries

In [84]:
staff_query=sf.query("SELECT Name from Staff__c")

In [85]:
staff_query

OrderedDict([('totalSize', 4),
             ('done', True),
             ('records',
              [OrderedDict([('attributes',
                             OrderedDict([('type', 'Staff__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Staff__c/a0I3h0000007dPnEAI')])),
                            ('Name', '1')]),
               OrderedDict([('attributes',
                             OrderedDict([('type', 'Staff__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Staff__c/a0I3h0000007dPoEAI')])),
                            ('Name', '2')]),
               OrderedDict([('attributes',
                             OrderedDict([('type', 'Staff__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Staff__c/a0I3h0000007dPpEAI')])),
                            ('

In [87]:
staff_dict={}
x=0
while x< len(staff_query['records']):
    url=staff_query['records'][x]['attributes']['url'].split('/')[6]
    staff_id=int(staff_query['records'][x]['Name'])
    print(url,staff_id)
    staff_dict[staff_id]=url
    x=x+1
print(staff_dict)

a0I3h0000007dPnEAI 1
a0I3h0000007dPoEAI 2
a0I3h0000007dPpEAI 3
a0I3h0000007dPqEAI 4
{1: 'a0I3h0000007dPnEAI', 2: 'a0I3h0000007dPoEAI', 3: 'a0I3h0000007dPpEAI', 4: 'a0I3h0000007dPqEAI'}


In [88]:
for index,row in staffassignment_data.iterrows():
    try:
        staffassignment_data['ID_Staff__c'][index]=staff_dict[row['ID_Staff__c']]
    except KeyError as e:
        print(e)
staffassignment_data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Name,ID_Staff__c,ID_Class__c,Role__c,StartDate__c,EndDate__c
0,1,a0I3h0000007dPnEAI,1,Teacher Assistant,2020-03-16,
1,2,a0I3h0000007dPqEAI,1,Teacher Assistant,2020-03-16,
2,3,a0I3h0000007dPpEAI,1,Instructor,2020-03-16,


In [89]:
class_query=sf.query("SELECT Name from class__c")
class_query

OrderedDict([('totalSize', 4),
             ('done', True),
             ('records',
              [OrderedDict([('attributes',
                             OrderedDict([('type', 'class__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/class__c/a0F3h0000026uSZEAY')])),
                            ('Name', '1')]),
               OrderedDict([('attributes',
                             OrderedDict([('type', 'class__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/class__c/a0F3h0000026uSaEAI')])),
                            ('Name', '2')]),
               OrderedDict([('attributes',
                             OrderedDict([('type', 'class__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/class__c/a0F3h0000026uSbEAI')])),
                            ('

In [90]:
class_dict={}
x=0
while x< len(class_query['records']):
    url=class_query['records'][x]['attributes']['url'].split('/')[6]
    class_id=int(class_query['records'][x]['Name'])
    print(url,class_id)
    class_dict[class_id]=url
    x=x+1
print(class_dict)

a0F3h0000026uSZEAY 1
a0F3h0000026uSaEAI 2
a0F3h0000026uSbEAI 3
a0F3h0000026uScEAI 4
{1: 'a0F3h0000026uSZEAY', 2: 'a0F3h0000026uSaEAI', 3: 'a0F3h0000026uSbEAI', 4: 'a0F3h0000026uScEAI'}


In [91]:
for index,row in staffassignment_data.iterrows():
    staffassignment_data['ID_Class__c'][index]=class_dict[row['ID_Class__c']]
staffassignment_data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Name,ID_Staff__c,ID_Class__c,Role__c,StartDate__c,EndDate__c
0,1,a0I3h0000007dPnEAI,a0F3h0000026uSZEAY,Teacher Assistant,2020-03-16,
1,2,a0I3h0000007dPqEAI,a0F3h0000026uSZEAY,Teacher Assistant,2020-03-16,
2,3,a0I3h0000007dPpEAI,a0F3h0000026uSZEAY,Instructor,2020-03-16,


In [92]:
staffassignment_load = staffassignment_data.to_dict('records')
staffassignment_load 

[{'Name': 1,
  'ID_Staff__c': 'a0I3h0000007dPnEAI',
  'ID_Class__c': 'a0F3h0000026uSZEAY',
  'Role__c': 'Teacher Assistant',
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 2,
  'ID_Staff__c': 'a0I3h0000007dPqEAI',
  'ID_Class__c': 'a0F3h0000026uSZEAY',
  'Role__c': 'Teacher Assistant',
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 3,
  'ID_Staff__c': 'a0I3h0000007dPpEAI',
  'ID_Class__c': 'a0F3h0000026uSZEAY',
  'Role__c': 'Instructor',
  'StartDate__c': '2020-03-16',
  'EndDate__c': None}]

## Inserting rows in bulk

In [93]:
try:
    sf.bulk.StaffAssignment__c.insert(staffassignment_load)
except Exception as e:
    print(e)