In [1]:
import os
orig_working_directory=os.getcwd()
orig_working_directory

'C:\\Users\\clemi\\Desktop\\Clementine-GWU-HW\\ETL_Project'

In [2]:
os.chdir(os.path.join('..'))
curr_working_directory=os.getcwd()
curr_working_directory

'C:\\Users\\clemi\\Desktop\\Clementine-GWU-HW'

## Import Dependencies

In [3]:
import json
from sqlalchemy import create_engine
import pandas as pd
import pymysql
pymysql.install_as_MySQLdb()

## Import configuration variables

In [4]:
from salesforce_config import sf_username, sf_password, sf_security_token
from salesforce_config import remote_db_endpoint, remote_db_port
from salesforce_config import remote_db_name, remote_db_user, remote_db_pwd

## Set up simple_salesforce

In [5]:
from simple_salesforce import Salesforce
sf = Salesforce(username=sf_username, password=sf_password, security_token=sf_security_token)

## Connect to your MySQL database

In [6]:
engine = create_engine(f"mysql://{remote_db_user}:{remote_db_pwd}@{remote_db_endpoint}:{remote_db_port}/{remote_db_name}")
conn = engine.connect()

## Query Data

In [7]:
course_data = pd.read_sql("SELECT * FROM course", conn)
course_data.head(30)

Unnamed: 0,ID_Course,CourseCode,CourseName,CreditHours,BootCampCourse
0,1,BC-DATAVIZ,Data Visualization and Analytics,12,1
1,2,BC-WEBDEV,Full Stack Web Development,12,1
2,3,BC-UIUX,User Interface/User Experience,12,1
3,4,CIS-349,Introduction to Databases,5,0
4,5,CIS-405,Database Programming,5,0
5,6,CIS-438,Database Administration,5,0
6,7,CIS-430,Business Systems Programming I,5,0
7,8,CIS-435,Business Systems Programming II,5,0


## Rename Columns

In [8]:
course_data=course_data.rename(columns={"ID_Course":"ID_Course__c","CourseCode":"CourseCode__c","CourseName":"Name",
                                       "CreditHours":"CreditHours__c","BootCampCourse":"BootCampCourse__c"})
course_data.head()

Unnamed: 0,ID_Course__c,CourseCode__c,Name,CreditHours__c,BootCampCourse__c
0,1,BC-DATAVIZ,Data Visualization and Analytics,12,1
1,2,BC-WEBDEV,Full Stack Web Development,12,1
2,3,BC-UIUX,User Interface/User Experience,12,1
3,4,CIS-349,Introduction to Databases,5,0
4,5,CIS-405,Database Programming,5,0


## Convert DataFrame to a list of Dictionaries

In [9]:
course_load = course_data.to_dict('records')
course_load

[{'ID_Course__c': 1,
  'CourseCode__c': 'BC-DATAVIZ',
  'Name': 'Data Visualization and Analytics',
  'CreditHours__c': 12,
  'BootCampCourse__c': 1},
 {'ID_Course__c': 2,
  'CourseCode__c': 'BC-WEBDEV',
  'Name': 'Full Stack Web Development',
  'CreditHours__c': 12,
  'BootCampCourse__c': 1},
 {'ID_Course__c': 3,
  'CourseCode__c': 'BC-UIUX',
  'Name': 'User Interface/User Experience',
  'CreditHours__c': 12,
  'BootCampCourse__c': 1},
 {'ID_Course__c': 4,
  'CourseCode__c': 'CIS-349',
  'Name': 'Introduction to Databases',
  'CreditHours__c': 5,
  'BootCampCourse__c': 0},
 {'ID_Course__c': 5,
  'CourseCode__c': 'CIS-405',
  'Name': 'Database Programming',
  'CreditHours__c': 5,
  'BootCampCourse__c': 0},
 {'ID_Course__c': 6,
  'CourseCode__c': 'CIS-438',
  'Name': 'Database Administration',
  'CreditHours__c': 5,
  'BootCampCourse__c': 0},
 {'ID_Course__c': 7,
  'CourseCode__c': 'CIS-430',
  'Name': 'Business Systems Programming I',
  'CreditHours__c': 5,
  'BootCampCourse__c': 0},
 

## Inserting rows in bulk

In [10]:
try:
    sf.bulk.Course__c.insert(course_load)
except Exception as e:
    print(e)

# Class Data

### Query Data

In [11]:
class_data = pd.read_sql("SELECT * FROM class", conn)
class_data.head(5)

Unnamed: 0,ID_Class,ID_Course,Section,StartDate,EndDate
0,1,1,GWU-ARL-DATA-PT-09-0,2020-03-16,2020-03-14
1,2,1,GWDC201805DATA3,2018-05-15,2018-11-08
2,3,2,GWARL201905WEB3,2019-05-14,2019-11-07
3,4,3,GWARL201905UIUX3,2019-05-14,2019-11-07


### Rename Columns 

In [12]:
class_data=class_data.rename(columns={"ID_Class":"ID_Class__c","ID_Course":"ID_Course__c","Section":"Name",
                                      "StartDate":"StartDate__c","EndDate":"EndDate__c"})

class_data.head(5)

Unnamed: 0,ID_Class__c,ID_Course__c,Name,StartDate__c,EndDate__c
0,1,1,GWU-ARL-DATA-PT-09-0,2020-03-16,2020-03-14
1,2,1,GWDC201805DATA3,2018-05-15,2018-11-08
2,3,2,GWARL201905WEB3,2019-05-14,2019-11-07
3,4,3,GWARL201905UIUX3,2019-05-14,2019-11-07


In [13]:
class_data["StartDate__c"]=class_data["StartDate__c"].astype(str)
class_data["EndDate__c"]=class_data["EndDate__c"].astype(str)

### Extract Record ID 

In [14]:
course_query=sf.query("SELECT ID_Course__c from Course__c")

In [15]:
course_query

OrderedDict([('totalSize', 8),
             ('done', True),
             ('records',
              [OrderedDict([('attributes',
                             OrderedDict([('type', 'Course__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Course__c/a013h00000721iaAAA')])),
                            ('ID_Course__c', 1.0)]),
               OrderedDict([('attributes',
                             OrderedDict([('type', 'Course__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Course__c/a013h00000721ibAAA')])),
                            ('ID_Course__c', 2.0)]),
               OrderedDict([('attributes',
                             OrderedDict([('type', 'Course__c'),
                                          ('url',
                                           '/services/data/v42.0/sobjects/Course__c/a013h00000721icAAA')])),
        

In [16]:
course_dict={}
x=0
while x< len(query['records']):
    url=query['records'][x]['attributes']['url'].split('/')[6]
    course_id=int(query['records'][x]['ID_Course__c'])
    print(url,course_id)
    course_dict[course_id]=url
    x=x+1
print(course_dict)

a013h00000721iaAAA 1
a013h00000721ibAAA 2
a013h00000721icAAA 3
a013h00000721idAAA 4
a013h00000721ieAAA 5
a013h00000721ifAAA 6
a013h00000721igAAA 7
a013h00000721ihAAA 8
{1: 'a013h00000721iaAAA', 2: 'a013h00000721ibAAA', 3: 'a013h00000721icAAA', 4: 'a013h00000721idAAA', 5: 'a013h00000721ieAAA', 6: 'a013h00000721ifAAA', 7: 'a013h00000721igAAA', 8: 'a013h00000721ihAAA'}


In [17]:
for index,row in class_data.iterrows():
    class_data['ID_Course__c'][index]=course_dict[row['ID_Course__c']]
class_data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


Unnamed: 0,ID_Class__c,ID_Course__c,Name,StartDate__c,EndDate__c
0,1,a013h00000721iaAAA,GWU-ARL-DATA-PT-09-0,2020-03-16,2020-03-14
1,2,a013h00000721iaAAA,GWDC201805DATA3,2018-05-15,2018-11-08
2,3,a013h00000721ibAAA,GWARL201905WEB3,2019-05-14,2019-11-07
3,4,a013h00000721icAAA,GWARL201905UIUX3,2019-05-14,2019-11-07


In [20]:
class_query=sf.query("SELECT ID_Class__c from Class__c")

In [21]:
class_query

OrderedDict([('totalSize', 0), ('done', True), ('records', [])])

### Convert DataFrame to a list Dictionaries 

In [18]:
class_load = class_data.to_dict('records')
class_load

[{'ID_Class__c': 1,
  'ID_Course__c': 'a013h00000721iaAAA',
  'Name': 'GWU-ARL-DATA-PT-09-0',
  'StartDate__c': '2020-03-16',
  'EndDate__c': '2020-03-14'},
 {'ID_Class__c': 2,
  'ID_Course__c': 'a013h00000721iaAAA',
  'Name': 'GWDC201805DATA3',
  'StartDate__c': '2018-05-15',
  'EndDate__c': '2018-11-08'},
 {'ID_Class__c': 3,
  'ID_Course__c': 'a013h00000721ibAAA',
  'Name': 'GWARL201905WEB3',
  'StartDate__c': '2019-05-14',
  'EndDate__c': '2019-11-07'},
 {'ID_Class__c': 4,
  'ID_Course__c': 'a013h00000721icAAA',
  'Name': 'GWARL201905UIUX3',
  'StartDate__c': '2019-05-14',
  'EndDate__c': '2019-11-07'}]

### Inserting rows in bulk 

In [19]:
try:
    sf.bulk.Class__c.insert(class_load)
except Exception as e:
    print(e)