In [None]:
#!pip install simple_salesforce #allows us to work with salesforce API

In [1]:
import os
orig_working_directory=os.getcwd()
orig_working_directory

'C:\\Users\\clemi\\Desktop\\Clementine-GWU-HW\\ETL_Project'

In [2]:
os.chdir(os.path.join('..'))
curr_working_directory=os.getcwd()
curr_working_directory

'C:\\Users\\clemi\\Desktop\\Clementine-GWU-HW'

## Import Dependencies

In [3]:
import json
from sqlalchemy import create_engine
import pandas as pd
import pymysql
pymysql.install_as_MySQLdb()

## Import configuration variables

In [4]:
from salesforce_config import sf_username, sf_password, sf_security_token
from salesforce_config import remote_db_endpoint, remote_db_port
from salesforce_config import remote_db_name, remote_db_user, remote_db_pwd

## Set up simple_salesforce

In [5]:
from simple_salesforce import Salesforce
sf = Salesforce(username=sf_username, password=sf_password, security_token=sf_security_token)

## Connect to your MySQL database

In [6]:
engine = create_engine(f"mysql://{remote_db_user}:{remote_db_pwd}@{remote_db_endpoint}:{remote_db_port}/{remote_db_name}")
conn = engine.connect()

## Query Data

In [7]:
student_data = pd.read_sql("SELECT * FROM student", conn)
student_data.head(5)

Unnamed: 0,ID_Student,StudentID,LastName,FirstName,MiddleName,BirthDate,Gender
0,33,25004961,Moore,Heather,Alice,,F
1,34,25003514,Multak,Ilana,Cecille,,F
2,35,25005833,Murillo,Jessica,Dorothy,,F
3,36,25002589,Romanowski,Kandra,Genevieve,,F
4,37,25007185,Hoffer,Katherine,Lynnette,,F


## Transform Data

In [8]:
#student_data['FullName'] = student_data['FirstName'].str.cat(student_data['LastName'], sep=" ")
student_data=student_data.rename(columns={"ID_Student":"ID_Student__c","StudentID":"StudentID__c","LastName":"Name",
                                       "FirstName":"FirstName__c","MiddleName":"MiddleName__c","Gender":"Gender__c",
                                        "BirthDate":"BirthDate__c"})
student_data.head(5)

Unnamed: 0,ID_Student__c,StudentID__c,Name,FirstName__c,MiddleName__c,BirthDate__c,Gender__c
0,33,25004961,Moore,Heather,Alice,,F
1,34,25003514,Multak,Ilana,Cecille,,F
2,35,25005833,Murillo,Jessica,Dorothy,,F
3,36,25002589,Romanowski,Kandra,Genevieve,,F
4,37,25007185,Hoffer,Katherine,Lynnette,,F


## Convert DataFrame to a list of Dictionaries

In [9]:
student_load = student_data.to_dict('records')
student_load

[{'ID_Student__c': 33,
  'StudentID__c': '25004961',
  'Name': 'Moore',
  'FirstName__c': 'Heather',
  'MiddleName__c': 'Alice',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'ID_Student__c': 34,
  'StudentID__c': '25003514',
  'Name': 'Multak',
  'FirstName__c': 'Ilana',
  'MiddleName__c': 'Cecille',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'ID_Student__c': 35,
  'StudentID__c': '25005833',
  'Name': 'Murillo',
  'FirstName__c': 'Jessica',
  'MiddleName__c': 'Dorothy',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'ID_Student__c': 36,
  'StudentID__c': '25002589',
  'Name': 'Romanowski',
  'FirstName__c': 'Kandra',
  'MiddleName__c': 'Genevieve',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'ID_Student__c': 37,
  'StudentID__c': '25007185',
  'Name': 'Hoffer',
  'FirstName__c': 'Katherine',
  'MiddleName__c': 'Lynnette',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'ID_Student__c': 38,
  'StudentID__c': '25006014',
  'Name': 'Poocharoen',
  'FirstName__c': 'Pariya',
  'Mid

### Inserting rows in bulk

In [10]:
try:
    sf.bulk.Student__c.insert(student_load) #Student__c is the API name
except Exception as e:
    print(e)

## Class Participant 

### Query Data

In [11]:
participant_data = pd.read_sql("SELECT * FROM classparticipant", conn)
participant_data.head(30)

Unnamed: 0,ID_ClassParticipant,ID_Student,ID_Class,StartDate,EndDate
0,1,33,1,2020-03-16,
1,2,34,1,2020-03-16,
2,3,35,1,2020-03-16,
3,4,62,1,2020-03-16,
4,5,36,1,2020-03-16,
5,6,37,1,2020-03-16,
6,7,38,1,2020-03-16,
7,8,39,1,2020-03-16,
8,9,40,1,2020-03-16,
9,10,41,1,2020-03-16,


### Rename Columns

In [12]:
participant_data=participant_data.rename(columns={"ID_ClassParticipant":"Name","ID_Student":"ID_Student__c",
                                        "ID_Class":"ID_Class__c","StartDate":"StartDate__c","EndDate":"EndDate__c"})
#convert date to string otherwise we will get JSON nonserilizable error
participant_data["StartDate__c"]=participant_data["StartDate__c"].astype(str)
participant_data.head()

Unnamed: 0,Name,ID_Student__c,ID_Class__c,StartDate__c,EndDate__c
0,1,33,1,2020-03-16,
1,2,34,1,2020-03-16,
2,3,35,1,2020-03-16,
3,4,62,1,2020-03-16,
4,5,36,1,2020-03-16,


### Extract Record ID 

In [13]:
query=sf.query("SELECT ID_Student__c from Student__c")

In [14]:
query

OrderedDict([('totalSize', 30),
             ('done', True),
             ('records',
              [OrderedDict([('attributes',
                             OrderedDict([('type', 'Student__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Student__c/a0A3h000002c6SBEAY')])),
                            ('ID_Student__c', '33')]),
               OrderedDict([('attributes',
                             OrderedDict([('type', 'Student__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Student__c/a0A3h000002c6SCEAY')])),
                            ('ID_Student__c', '34')]),
               OrderedDict([('attributes',
                             OrderedDict([('type', 'Student__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Student__c/a0A3h000002c6SDEAY')])

### Extract Student table record numbers

In [15]:
student_dict={}
x=0
while x< len(query['records']):
    url=query['records'][x]['attributes']['url'].split('/')[6]
    student_id=int(query['records'][x]['ID_Student__c'])
    print(url,student_id)
    student_dict[student_id]=url
    x=x+1
print(student_dict)

a0A3h000002c6SBEAY 33
a0A3h000002c6SCEAY 34
a0A3h000002c6SDEAY 35
a0A3h000002c6SEEAY 36
a0A3h000002c6SFEAY 37
a0A3h000002c6SGEAY 38
a0A3h000002c6SHEAY 39
a0A3h000002c6SIEAY 40
a0A3h000002c6SJEAY 41
a0A3h000002c6SKEAY 42
a0A3h000002c6SLEAY 43
a0A3h000002c6SMEAY 44
a0A3h000002c6SNEAY 45
a0A3h000002c6SOEAY 46
a0A3h000002c6SPEAY 47
a0A3h000002c6SQEAY 48
a0A3h000002c6SREAY 49
a0A3h000002c6SSEAY 50
a0A3h000002c6STEAY 51
a0A3h000002c6SUEAY 52
a0A3h000002c6SVEAY 53
a0A3h000002c6SWEAY 54
a0A3h000002c6SXEAY 55
a0A3h000002c6SYEAY 56
a0A3h000002c6SZEAY 57
a0A3h000002c6SaEAI 58
a0A3h000002c6SbEAI 59
a0A3h000002c6ScEAI 60
a0A3h000002c6SdEAI 61
a0A3h000002c6SeEAI 62
{33: 'a0A3h000002c6SBEAY', 34: 'a0A3h000002c6SCEAY', 35: 'a0A3h000002c6SDEAY', 36: 'a0A3h000002c6SEEAY', 37: 'a0A3h000002c6SFEAY', 38: 'a0A3h000002c6SGEAY', 39: 'a0A3h000002c6SHEAY', 40: 'a0A3h000002c6SIEAY', 41: 'a0A3h000002c6SJEAY', 42: 'a0A3h000002c6SKEAY', 43: 'a0A3h000002c6SLEAY', 44: 'a0A3h000002c6SMEAY', 45: 'a0A3h000002c6SNEAY', 4

In [16]:
for index,row in participant_data.iterrows():
    participant_data['ID_Student__c'][index]=student_dict[row['ID_Student__c']]
participant_data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


Unnamed: 0,Name,ID_Student__c,ID_Class__c,StartDate__c,EndDate__c
0,1,a0A3h000002c6SBEAY,1,2020-03-16,
1,2,a0A3h000002c6SCEAY,1,2020-03-16,
2,3,a0A3h000002c6SDEAY,1,2020-03-16,
3,4,a0A3h000002c6SeEAI,1,2020-03-16,
4,5,a0A3h000002c6SEEAY,1,2020-03-16,
5,6,a0A3h000002c6SFEAY,1,2020-03-16,
6,7,a0A3h000002c6SGEAY,1,2020-03-16,
7,8,a0A3h000002c6SHEAY,1,2020-03-16,
8,9,a0A3h000002c6SIEAY,1,2020-03-16,
9,10,a0A3h000002c6SJEAY,1,2020-03-16,


In [None]:
query=sf.query("SELECT ID_Class__c from Class__c")

In [None]:
student_dict={}
x=0
while x< len(query['records']):
    url=query['records'][x]['attributes']['url'].split('/')[6]
    student_id=int(query['records'][x]['ID_Student__c'])
    print(url,student_id)
    student_dict[student_id]=url
    x=x+1
print(student_dict)

### Convert Dataframe to list of dictionaries

In [17]:
participant_load = participant_data.to_dict('records')
participant_load

[{'Name': 1,
  'ID_Student__c': 'a0A3h000002c6SBEAY',
  'ID_Class__c': 1,
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 2,
  'ID_Student__c': 'a0A3h000002c6SCEAY',
  'ID_Class__c': 1,
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 3,
  'ID_Student__c': 'a0A3h000002c6SDEAY',
  'ID_Class__c': 1,
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 4,
  'ID_Student__c': 'a0A3h000002c6SeEAI',
  'ID_Class__c': 1,
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 5,
  'ID_Student__c': 'a0A3h000002c6SEEAY',
  'ID_Class__c': 1,
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 6,
  'ID_Student__c': 'a0A3h000002c6SFEAY',
  'ID_Class__c': 1,
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 7,
  'ID_Student__c': 'a0A3h000002c6SGEAY',
  'ID_Class__c': 1,
  'StartDate__c': '2020-03-16',
  'EndDate__c': None},
 {'Name': 8,
  'ID_Student__c': 'a0A3h000002c6SHEAY',
  'ID_Class__c': 1,
  'StartDate__c': '2020

### Inserting rows in bulk

In [18]:
try:
    sf.bulk.ClassParticipant__c.insert(participant_load)
except Exception as e:
    print(e)

# Course

In [19]:
course_data = pd.read_sql("SELECT * FROM course", conn)
course_data.head(30)

Unnamed: 0,ID_Course,CourseCode,CourseName,CreditHours,BootCampCourse
0,1,BC-DATAVIZ,Data Visualization and Analytics,12,1
1,2,BC-WEBDEV,Full Stack Web Development,12,1
2,3,BC-UIUX,User Interface/User Experience,12,1
3,4,CIS-349,Introduction to Databases,5,0
4,5,CIS-405,Database Programming,5,0
5,6,CIS-438,Database Administration,5,0
6,7,CIS-430,Business Systems Programming I,5,0
7,8,CIS-435,Business Systems Programming II,5,0


In [None]:
course_data=course_data.rename(columns={"ID_Course":"ID_Course__c","CourseCode":"CourseCode__c","CourseName":"Name",
                                       "CreditHours":"CreditHours__c","BootCampCourse":"BootCampCourse__c"})
course_data.head()

In [None]:
course_load = course_data.to_dict('records')
course_load

In [None]:
try:
    sf.bulk.Course__c.insert(course_load)
except Exception as e:
    print(e)

# Class

In [None]:
class_data = pd.read_sql("SELECT * FROM class", conn)
class_data.head(5)

In [None]:
class_data=class_data.rename(columns={"ID_Class":"ID_Class__c","ID_Course":"ID_Course__c","Section":"Name",
                                      "StartDate":"StartDate__c","EndDate":"EndDate__c"})

class_data.head(5)

In [None]:
class_data["StartDate__c"]=class_data["StartDate__c"].astype(str)
class_data["EndDate__c"]=class_data["EndDate__c"].astype(str)

In [None]:
course_query=sf.query("SELECT ID_Course__c from Course__c")

course_query

In [None]:
course_dict={}
x=0
while x< len(course_query['records']):
    url=course_query['records'][x]['attributes']['url'].split('/')[6]
    course_id=int(course_query['records'][x]['ID_Course__c'])
    print(url,course_id)
    course_dict[course_id]=url
    x=x+1
print(course_dict)

In [None]:
for index,row in class_data.iterrows():
    class_data['ID_Course__c'][index]=course_dict[row['ID_Course__c']]
class_data

In [None]:
participant_query=sf.query("SELECT ID_Class__c from ClassParticipant__c")

participant_query

In [None]:
participant_dict={}
x=0
while x< len(participant_query['records']):
    url=participant_query['records'][x]['attributes']['url'].split('/')[6]
    participant_id=int(participant_query['records'][x]['ID_Class__c'])
    print(url,participant_id)
    participant_dict[participant_id]=url
    x=x+1
print(participant_dict)

In [None]:
for index,row in class_data.iterrows():
    try:
        class_data['ID_Class__c'][index]=participant_dict[row['ID_Class__c']]
    except KeyError as e:
        print(e)
class_data

In [None]:
class_load = class_data.to_dict('records')
class_load

In [None]:
try:
    sf.bulk.Class__c.insert(class_load)
except Exception as e:
    print(e)