In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, event
import urllib.parse
import time

In [2]:
# CSV SOURCE DATA
edu_csv_filename='D:/Data/IDs_educ.csv'
variable_uid = 'PPSN'
variable_edu = 'educ2'
variable_edu_rank = 'edu_rank'

# SQL Connection Params
server = 'CSKMA0400\RDB_Data'
db = 'JLDJobPath'
odbc_connection_string = 'DRIVER={SQL Server Native Client 11.0};SERVER='+server+';DATABASE='+db+';Trusted_Connection=yes'

# SQL Tables
# Source TABLE
sql_table = "linkedclaims_casuals_2018m04_v2_flat_20140101_with_income" 
sql_uid_var = 'ppsn'

# OUTPUT TABLE FOR LEFT JOIN
sql_table_out = sql_table + "_with_edu"

# Procedure

In [45]:
edu_data = pd.read_csv(edu_csv_filename)

edu_data = edu_data[[variable_uid, variable_edu]]

edu_data['edu_rank'] = edu_data['educ2'].str.strip().str[:8]
edu_data.edu_rank.replace('Level 1 ', 1, inplace=True)
edu_data.edu_rank.replace('Level 2 ', 2, inplace=True)
edu_data.edu_rank.replace('Level 3 ', 3, inplace=True)
edu_data.edu_rank.replace('Level 4 ', 4, inplace=True)
edu_data.edu_rank.replace('Level 5 ', 5, inplace=True)
edu_data.edu_rank.replace('Level 6 ', 6, inplace=True)
edu_data.edu_rank.replace('Level 7 ', 7, inplace=True)
edu_data.edu_rank.replace('Level 8 ', 8, inplace=True)
edu_data.edu_rank.replace('Level 9 ', 9, inplace=True)
edu_data.edu_rank.replace('Level 10', 10, inplace=True)
edu_data.edu_rank.replace('I', 3, inplace=True)
edu_data.edu_rank.replace('G', 3, inplace=True)
edu_data.edu_rank.replace('E', 3, inplace=True)

edu_data = edu_data[[variable_uid, variable_edu_rank]]

# Load into SQL
same_table_in_out = False
if sql_table == sql_table_out:
    same_table_in_out = True
    sql_table_out = '_'+sql_table_out
sql_table_temp = sql_table_out+"_temp"

## Connect to SQL
mytime = time.time()
params = urllib.parse.quote_plus(odbc_connection_string)
engine = create_engine('mssql+pyodbc:///?odbc_connect=%s' % params)
conn = engine.connect().connection
cursor = conn.cursor()

# SpeedUp For fast execution of mutiple row 
@event.listens_for(engine, 'before_cursor_execute')
def receive_before_cursor_execute(conn, cursor, statement, params, context, executemany):
    if executemany:
        cursor.fast_executemany = True

## Drop table if exists
sql_string_drop = "IF OBJECT_ID('"+ sql_table_temp + "', 'U') IS NOT NULL" +'\n'+ "DROP TABLE " + sql_table_temp
cursor.execute(sql_string_drop)
conn.commit()

## upload data
print('\nUploading to Temp Table SQL: %s' %(sql_table_temp))
edu_data.to_sql(sql_table_temp, engine, if_exists='append', index=False)

## Left Join
## Drop table if exists
sql_string_drop = "IF OBJECT_ID('"+ sql_table_out + "', 'U') IS NOT NULL" +'\n'+ "DROP TABLE " + sql_table_out
cursor.execute(sql_string_drop)
conn.commit()

print('\nJoining Tables into %s' %(sql_table_out))
sql_left_join = 'SELECT A.*, B.* INTO '+ sql_table_out + '\n' +\
                ' FROM ' + sql_table + ' AS A LEFT JOIN ' + \
                '(SELECT '+ variable_edu_rank + ',' + variable_uid + ' AS id FROM ' + sql_table_temp +') AS B \n' + \
                ' ON A.'+ sql_uid_var +' = '+'B.id'

cursor.execute(sql_left_join)
conn.commit()

# Drop id, s, e from joined table
sql_drop_column = "ALTER TABLE " + sql_table_out + " DROP COLUMN " + "id";
cursor.execute(sql_drop_column)
conn.commit()

## Drop temp table if exists
print('\nDrop Temp Table')
sql_string_drop = "IF OBJECT_ID('"+ sql_table_temp + "', 'U') IS NOT NULL" +'\n'+ "DROP TABLE " + sql_table_temp
cursor.execute(sql_string_drop)
conn.commit()

## rename out if required
if same_table_in_out == True:
    print ('Copying into output table: %s' %(sql_table))
    sql_string_drop = "IF OBJECT_ID('"+ sql_table + "', 'U') IS NOT NULL" +'\n'+ "DROP TABLE " + sql_table
    cursor.execute(sql_string_drop)
    conn.commit()

    sql_copy = 'SELECT * INTO ' + sql_table + ' FROM ' + sql_table_out
    cursor.execute(sql_copy)
    conn.commit()
    
    sql_string_drop = "IF OBJECT_ID('"+ sql_table_out + "', 'U') IS NOT NULL" +'\n'+ "DROP TABLE " + sql_table_out
    cursor.execute(sql_string_drop)
    conn.commit()


#Close SQL Connection
conn.close()

elapsed_time = time.time() - mytime
print ('SQL Process time: '+ time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))
print ('\nALL DONE')