In [1]:
import pyspark
import pyspark.sql.functions as F
from pyspark.sql import SparkSession
from pyspark.sql.functions import concat_ws
from credentials import mysql_username, mysql_password

# create the SparkSession
spark = SparkSession.builder.appName('creditcard-pyspark').getOrCreate()

In [2]:
df_credit = spark.read.json("cdw_sapp_credit.json")
df_credit.show()

+-----------+----------------+---------+---+-----+--------------+----------------+-----------------+----+
|BRANCH_CODE|  CREDIT_CARD_NO| CUST_SSN|DAY|MONTH|TRANSACTION_ID|TRANSACTION_TYPE|TRANSACTION_VALUE|YEAR|
+-----------+----------------+---------+---+-----+--------------+----------------+-----------------+----+
|        114|4210653349028689|123459988| 14|    2|             1|       Education|             78.9|2018|
|         35|4210653349028689|123459988| 20|    3|             2|   Entertainment|            14.24|2018|
|        160|4210653349028689|123459988|  8|    7|             3|         Grocery|             56.7|2018|
|        114|4210653349028689|123459988| 19|    4|             4|   Entertainment|            59.73|2018|
|         93|4210653349028689|123459988| 10|   10|             5|             Gas|             3.59|2018|
|        164|4210653349028689|123459988| 28|    5|             6|       Education|             6.89|2018|
|        119|4210653349028689|123459988| 19|  

In [3]:
df_credit.columns
df_credit.printSchema()
df_credit.describe().show()
df_credit.show()

root
 |-- BRANCH_CODE: long (nullable = true)
 |-- CREDIT_CARD_NO: string (nullable = true)
 |-- CUST_SSN: long (nullable = true)
 |-- DAY: long (nullable = true)
 |-- MONTH: long (nullable = true)
 |-- TRANSACTION_ID: long (nullable = true)
 |-- TRANSACTION_TYPE: string (nullable = true)
 |-- TRANSACTION_VALUE: double (nullable = true)
 |-- YEAR: long (nullable = true)

+-------+------------------+--------------------+-------------------+-----------------+------------------+------------------+----------------+------------------+--------------------+
|summary|       BRANCH_CODE|      CREDIT_CARD_NO|           CUST_SSN|              DAY|             MONTH|    TRANSACTION_ID|TRANSACTION_TYPE| TRANSACTION_VALUE|                YEAR|
+-------+------------------+--------------------+-------------------+-----------------+------------------+------------------+----------------+------------------+--------------------+
|  count|             46694|               46694|              46694|        

In [4]:

df_credit = df_credit.withColumn("TimeID", concat_ws('', df_credit['year'], 
    F.lpad(df_credit['month'], 2, '0'), 
    F.lpad(df_credit['day'], 2, '0')))

In [5]:
df_credit = df_credit.select('TRANSACTION_ID', 'BRANCH_CODE', 'CREDIT_CARD_NO', 'TRANSACTION_TYPE', 'TRANSACTION_VALUE', 'CUST_SSN', 'TIMEID')
df_credit.show()

+--------------+-----------+----------------+----------------+-----------------+---------+--------+
|TRANSACTION_ID|BRANCH_CODE|  CREDIT_CARD_NO|TRANSACTION_TYPE|TRANSACTION_VALUE| CUST_SSN|  TIMEID|
+--------------+-----------+----------------+----------------+-----------------+---------+--------+
|             1|        114|4210653349028689|       Education|             78.9|123459988|20180214|
|             2|         35|4210653349028689|   Entertainment|            14.24|123459988|20180320|
|             3|        160|4210653349028689|         Grocery|             56.7|123459988|20180708|
|             4|        114|4210653349028689|   Entertainment|            59.73|123459988|20180419|
|             5|         93|4210653349028689|             Gas|             3.59|123459988|20181010|
|             6|        164|4210653349028689|       Education|             6.89|123459988|20180528|
|             7|        119|4210653349028689|   Entertainment|            43.39|123459988|20180519|


In [None]:
df_credit.write.format("jdbc") \
  .mode("append") \
  .option("url", "jdbc:mysql://localhost:3306/creditcard_capstone") \
  .option("dbtable", "creditcard_capstone.cdw_sapp_credit_card2") \
  .option("user", mysql_username) \
  .option("password", mysql_password) \
  .save()