## **Setup the Environment**

In [1]:
# Run below commands to install libraries
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q https://archive.apache.org/dist/spark/spark-3.1.2/spark-3.1.2-bin-hadoop3.2.tgz
!tar xf spark-3.1.2-bin-hadoop3.2.tgz
!pip install -q findspark
!pip install pyarrow
!pip install python-snappy
!pip install emojis
!pip install nltk
!pip install demoji
!pip install emoji
!pip install fastparquet
!pip install pyspark

Collecting python-snappy
  Downloading python_snappy-0.6.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (56 kB)
[K     |████████████████████████████████| 56 kB 2.3 MB/s 
[?25hInstalling collected packages: python-snappy
Successfully installed python-snappy-0.6.1
Collecting emojis
  Downloading emojis-0.6.0-py3-none-any.whl (27 kB)
Installing collected packages: emojis
Successfully installed emojis-0.6.0
Collecting demoji
  Downloading demoji-1.1.0-py3-none-any.whl (42 kB)
[K     |████████████████████████████████| 42 kB 749 kB/s 
[?25hInstalling collected packages: demoji
Successfully installed demoji-1.1.0
Collecting emoji
  Downloading emoji-1.7.0.tar.gz (175 kB)
[K     |████████████████████████████████| 175 kB 5.0 MB/s 
[?25hBuilding wheels for collected packages: emoji
  Building wheel for emoji (setup.py) ... [?25l[?25hdone
  Created wheel for emoji: filename=emoji-1.7.0-py3-none-any.whl size=171046 sha256=f448d49d2c9f68f149b1532f880c217edc66637bd2f3c239f5de1cd

In [2]:
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-3.1.2-bin-hadoop3.2"

In [3]:
# Import required packages
import findspark
findspark.init()
import emoji
import emojis
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer, PorterStemmer
from pyspark.sql import SparkSession
from pyspark import SparkConf, SparkContext
from datetime import datetime, date, timedelta
from dateutil import relativedelta
from pyspark.sql import SQLContext, Row
from pyspark.sql.types import *
from pyspark.sql.functions import *
from pyspark.sql import DataFrame
from pyspark.sql.functions import *
from pyspark.sql import functions as F  
from pyspark.sql.functions import collect_list, collect_set, concat, first, array_distinct, col, size, expr
from pyspark.sql.functions import lag, lead
from pyspark.sql import DataFrame
from pyspark.sql.functions import array, col, explode, lit, struct
from typing import Iterable 
import random
import pandas as pd
from pyspark.sql.window import Window
import pyarrow.parquet as pq
import pyarrow
from collections import Counter
from typing import Iterable
import matplotlib.pyplot as plt
import seaborn as sns
from fastparquet import ParquetFile
import snappy
from pyspark.shell import spark
from nltk.stem import PorterStemmer
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /__ / .__/\_,_/_/ /_/\_\   version 3.1.2
      /_/

Using Python version 3.7.13 (default, Apr 24 2022 01:04:09)
Spark context Web UI available at http://00b5a4c286fc:4040
Spark context available as 'sc' (master = local[*], app id = local-1652501898241).
SparkSession available as 'spark'.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [4]:
from fastparquet import ParquetFile
import snappy

venmo_data = spark\
  .read\
  .option("inferSchema", "true")\
  .option("header", "true")\
  .parquet("/content/VenmoSample.snappy.parquet")

venmo_data.show(5)

+-------+-------+----------------+-------------------+------------+-----------+--------------------+
|  user1|  user2|transaction_type|           datetime| description|is_business|            story_id|
+-------+-------+----------------+-------------------+------------+-----------+--------------------+
|1218774|1528945|         payment|2015-11-27 10:48:19|        Uber|      false|5657c473cd03c9af2...|
|5109483|4782303|         payment|2015-06-17 11:37:04|      Costco|      false|5580f9702b64f70ab...|
|4322148|3392963|         payment|2015-06-19 07:05:31|Sweaty balls|      false|55835ccb1a624b14a...|
| 469894|1333620|          charge|2016-06-03 23:34:13|          🎥|      false|5751b185cd03c9af2...|
|2960727|3442373|         payment|2016-05-29 23:23:42|           ⚡|      false|574b178ecd03c9af2...|
+-------+-------+----------------+-------------------+------------+-----------+--------------------+
only showing top 5 rows



## **Text Analytics**

**Task_1: Use the text dictionary and the emoji dictionary to classify Venmo’s transactions in the sample dataset.**

In [5]:
venmo_word_dict = spark\
  .read\
  .option("inferSchema", "true")\
  .option("header", "true")\
  .csv("/content/Venmo Word Classification Dictonary BAX-423 - Word_Dict.csv")

venmo_word_dict.show(5)

+----------+---------+---------+--------+------+--------------+-----------+-----+---------------+
|    People|     Food|    Event|Activity|Travel|Transportation|    Utility| Cash|Illegal/Sarcasm|
+----------+---------+---------+--------+------+--------------+-----------+-----+---------------+
|    friend|     food| birthday|    ball| beach|          lyft|       bill| atm |      addiction|
|friendship|      bbq|christmas|    boat| place|          uber|      cable|bank |           drug|
|      baby|     bean|    happy|     bar|    la|           cab|        fee|cash |          wangs|
|       boy|    latte|     bday|    book| world|           bus|   electric|money|           weed|
|      girl|breakfast|  wedding|    club| hotel|           car|electricity| buck|           anal|
+----------+---------+---------+--------+------+--------------+-----------+-----+---------------+
only showing top 5 rows



In [6]:
venmo_emoji_dict = spark\
  .read\
  .option("inferSchema", "true")\
  .option("header", "true")\
  .csv("/content/Venmo_Emoji_Classification_Dictionary.csv")

venmo_emoji_dict.show(5)

+-----+------+----+--------+--------------+------+-------+
|Event|Travel|Food|Activity|Transportation|People|Utility|
+-----+------+----+--------+--------------+------+-------+
| 🇦🇺|    🏔|  🍇|      👾|            🚄|    😀|      ⚡|
| 🇫🇷|     ⛰|  🍈|      🕴|            🚅|    😃|     💡|
|   🎂|    🌋|  🍉|      🎪|            🚆|    😄|     🔌|
|   🛍|    🗻|  🍊|      🎭|            🚇|    😁|     📺|
| 🇨🇦|    🏕|  🍋|      🎨|            🚈|    😆|     🔌|
+-----+------+----+--------+--------------+------+-------+
only showing top 5 rows



In [7]:
# store each column of the word dictionary into lists

word_people = list(venmo_word_dict.select('People').dropna(how='all').toPandas()['People'])
word_food = list(venmo_word_dict.select('Food').dropna(how='all').toPandas()['Food'])
word_event = list(venmo_word_dict.select('Event').dropna(how='all').toPandas()['Event'])
word_activity = list(venmo_word_dict.select('Activity').dropna(how='all').toPandas()['Activity'])
word_travel = list(venmo_word_dict.select('Travel').dropna(how='all').toPandas()['Travel'])
word_transportation = list(venmo_word_dict.select('Transportation').dropna(how='all').toPandas()['Transportation'])
word_utility = list(venmo_word_dict.select('Utility').dropna(how='all').toPandas()['Utility'])
word_cash = list(venmo_word_dict.select('Cash').dropna(how='all').toPandas()['Cash'])
word_illegal = list(venmo_word_dict.select('Illegal/Sarcasm').dropna(how='all').toPandas()['Illegal/Sarcasm'])

In [8]:
# store each column of the emoji dictionary into lists

emoji_event = list(venmo_emoji_dict.select('Event').dropna(how='all').toPandas()['Event'].dropna(how='all'))
emoji_travel = list(venmo_emoji_dict.select('Travel').dropna(how='all').toPandas()['Travel'])
emoji_food = list(venmo_emoji_dict.select('Food').dropna(how='all').toPandas()['Food'])
emoji_activity = list(venmo_emoji_dict.select('Activity').dropna(how='all').toPandas()['Activity'])
emoji_transportation = list(venmo_emoji_dict.select('Transportation').dropna(how='all').toPandas()['Transportation'])
emoji_people = list(venmo_emoji_dict.select('People').dropna(how='all').toPandas()['People'])
emoji_utility = list(venmo_emoji_dict.select('Utility').dropna(how='all').toPandas()['Utility'])

In [9]:
import demoji
demoji.download_codes()

# convert emoji into text and strip unnecessary punctuations
@udf
def emoji_to_text(emojis):
  emoji_text = " ".join(emoji.demojize(emojis).replace("_", "").replace(":", " ").split())
  return emoji_text

  


In [10]:
# remove punctuations from string
@udf
def lower_clean_str(word):
  punc='!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
  lowercased_str = word.lower()
  for ch in punc:
    lowercased_str = lowercased_str.replace(ch, '')
  return lowercased_str

In [11]:
# text tokenization
@udf
def identify_tokens(word):
    tokens = nltk.word_tokenize(word)
    token_words = [w for w in tokens if w.isalpha()]
    return token_words

In [12]:
# text stemming
@udf
def stemming(word):
    stemming = PorterStemmer()
    stemmed_list = [stemming.stem(word) for word in word]
    return stemmed_list

In [13]:
# define classifications
@udf
def classify(word):
  for element in word:
    if element in word_people:
      return "People"
    elif element in word_food:
      return "Food"
    elif element in word_event:
      return "Event"
    elif element in word_activity:
      return "Activity"
    elif element in word_travel:
      return "Travel"
    elif element in word_transportation:
      return "Transportation"
    elif element in word_utility:
      return "Utility"
    elif element in word_cash:
      return "Cash"
    elif element in word_illegal:
      return "Illegal/Sarcasm"
    else:
      return "Unknown Category"

In [14]:
venmo_data = venmo_data.withColumn("emoji_text",emoji_to_text(col("description")))
venmo_data = venmo_data.withColumn("clean_punct",lower_clean_str(col("emoji_text")))
venmo_data = venmo_data.withColumn("clean_text",identify_tokens(col("clean_punct")))
venmo_data = venmo_data.withColumn("clean_text",stemming(col("clean_text")))
venmo_data = venmo_data.withColumn("classification",classify(col("clean_text")))
venmo_data = venmo_data.drop('emoji_text', 'clean_punct')

venmo_data.show()

+--------+-------+----------------+-------------------+--------------------+-----------+--------------------+--------------------+----------------+
|   user1|  user2|transaction_type|           datetime|         description|is_business|            story_id|          clean_text|  classification|
+--------+-------+----------------+-------------------+--------------------+-----------+--------------------+--------------------+----------------+
| 1218774|1528945|         payment|2015-11-27 10:48:19|                Uber|      false|5657c473cd03c9af2...|              [uber]|  Transportation|
| 5109483|4782303|         payment|2015-06-17 11:37:04|              Costco|      false|5580f9702b64f70ab...|            [costco]|            Food|
| 4322148|3392963|         payment|2015-06-19 07:05:31|        Sweaty balls|      false|55835ccb1a624b14a...|      [sweati, ball]|Unknown Category|
|  469894|1333620|          charge|2016-06-03 23:34:13|                  🎥|      false|5751b185cd03c9af2...|    

**Task_2: Compute the percent of emoji only transactions. Compute the top 5 most popular emoji and the top 3 most popular emoji categories?**

In [15]:
# check if only emoji
@udf
def check_emoji(descriptions):
  if len(descriptions) == emojis.count(descriptions):
    return 1
  else: return 0

venmo_data = venmo_data.withColumn("emoji",check_emoji(col("description")))
venmo_data.show(5)

+-------+-------+----------------+-------------------+------------+-----------+--------------------+--------------+----------------+-----+
|  user1|  user2|transaction_type|           datetime| description|is_business|            story_id|    clean_text|  classification|emoji|
+-------+-------+----------------+-------------------+------------+-----------+--------------------+--------------+----------------+-----+
|1218774|1528945|         payment|2015-11-27 10:48:19|        Uber|      false|5657c473cd03c9af2...|        [uber]|  Transportation|    0|
|5109483|4782303|         payment|2015-06-17 11:37:04|      Costco|      false|5580f9702b64f70ab...|      [costco]|            Food|    0|
|4322148|3392963|         payment|2015-06-19 07:05:31|Sweaty balls|      false|55835ccb1a624b14a...|[sweati, ball]|Unknown Category|    0|
| 469894|1333620|          charge|2016-06-03 23:34:13|          🎥|      false|5751b185cd03c9af2...| [moviecamera]|Unknown Category|    1|
|2960727|3442373|         pa

In [None]:
# find out percentage of emoji only transactions
venmo_data.select(sum("emoji")/venmo_data.count()).show()

+----------------------+
|(sum(emoji) / 7113137)|
+----------------------+
|   0.20637223773420926|
+----------------------+



In [16]:
# filter the descriptions for emoji only transactions
emojis_only = venmo_data.filter("emoji == '1'")
emojis_only.createOrReplaceTempView("emojis_only")
emojis_only.show(5)

+--------+-------+----------------+-------------------+-----------+-----------+--------------------+---------------+----------------+-----+
|   user1|  user2|transaction_type|           datetime|description|is_business|            story_id|     clean_text|  classification|emoji|
+--------+-------+----------------+-------------------+-----------+-----------+--------------------+---------------+----------------+-----+
|  469894|1333620|          charge|2016-06-03 23:34:13|         🎥|      false|5751b185cd03c9af2...|  [moviecamera]|Unknown Category|    1|
| 2960727|3442373|         payment|2016-05-29 23:23:42|          ⚡|      false|574b178ecd03c9af2...|   [highvoltag]|Unknown Category|    1|
| 5317324|3942984|         payment|2016-01-04 09:11:25|         👠|      false|5689c6bdcd03c9af2...|[highheeledsho]|Unknown Category|    1|
| 4238868|4879587|         payment|2015-10-04 08:28:01|         🍺|      false|561080a1cd03c9af2...|      [beermug]|Unknown Category|    1|
|11719500|8702716|     

In [None]:
# find the top 5 most popular emojis
Counter(emojis_only(col("description"))).show()

In [17]:
# create a function to filter emojis into different categories
@udf
def emoji_category(emo):
  if emo in emoji_event:
    return "Event"
  elif emo in emoji_travel:
    return "Travel"
  elif emo in emoji_food:
    return "Food"
  elif emo in emoji_transportation:
    return "Transportation"
  elif emo in emoji_people:
    return "People"
  elif emo in emoji_utility:
    return "Utility"
  else:
    return "Unknown Category"

In [18]:
emojis_only = emojis_only.withColumn("classification",emoji_category(col("description")))

emojis_only.show(5)

+--------+-------+----------------+-------------------+-----------+-----------+--------------------+---------------+----------------+-----+
|   user1|  user2|transaction_type|           datetime|description|is_business|            story_id|     clean_text|  classification|emoji|
+--------+-------+----------------+-------------------+-----------+-----------+--------------------+---------------+----------------+-----+
|  469894|1333620|          charge|2016-06-03 23:34:13|         🎥|      false|5751b185cd03c9af2...|  [moviecamera]|           Event|    1|
| 2960727|3442373|         payment|2016-05-29 23:23:42|          ⚡|      false|574b178ecd03c9af2...|   [highvoltag]|         Utility|    1|
| 5317324|3942984|         payment|2016-01-04 09:11:25|         👠|      false|5689c6bdcd03c9af2...|[highheeledsho]|Unknown Category|    1|
| 4238868|4879587|         payment|2015-10-04 08:28:01|         🍺|      false|561080a1cd03c9af2...|      [beermug]|            Food|    1|
|11719500|8702716|     

In [None]:
Counter(emojis_only("classification")).most_common(3)

**Task_3: For each user, create a variable to indicate their spending behavior profile. For example, if a user has made 10 transactions, where 5 of them are food and the other 5 are activity, then the user’s spending profile will be 50% food and 50% activity.**

In [19]:
# add index column and create temporary view
from functools import reduce 
from pyspark.sql import DataFrame
from pyspark.sql import SparkSession

text_only = venmo_data.filter("emoji == '0'")
text_only.createOrReplaceTempView("text_only")

def unionAll(*dfs):
    return reduce(DataFrame.unionAll, dfs)

venmo_data = unionAll(*[text_only,emojis_only]).sort("user1")
venmo_data.createOrReplaceTempView("Venmo_data")
venmo_data.show()

+-----+-------+----------------+-------------------+--------------------+-----------+--------------------+--------------------+----------------+-----+
|user1|  user2|transaction_type|           datetime|         description|is_business|            story_id|          clean_text|  classification|emoji|
+-----+-------+----------------+-------------------+--------------------+-----------+--------------------+--------------------+----------------+-----+
|    2|    220|         payment|2012-11-23 06:03:42|Grab that couch. ...|      false|54e419e6cd03c9af2...|[grab, that, couc...|Unknown Category|    0|
|    3|7854140|         payment|2016-10-09 03:36:13|Check out this re...|      false|57f958bd23e064eac...|[check, out, thi,...|         Utility|    0|
|    3|1079020|         payment|2016-10-07 23:37:56|Good luck on your...|      false|57f7cf6423e064eac...|[good, luck, on, ...|Unknown Category|    0|
|    3|     52|         payment|2016-09-22 15:30:09|Hehe.. we need so...|      false|57e396912

In [None]:
Spending_Behavior = spark.sql("""
                    SELECT user1, classification, count(*) AS transsaction_counts,
                    CONCAT(ROUND(COUNT(*)/SUM(COUNT(*)) OVER (PARTITION BY user1) *100, 2),'%') AS ratio
                    FROM venmo_data
                    GROUP BY user1, classification
                    ORDER BY user1
                    """)
Spending_Behavior.show()

+-----+----------------+-------------------+------+
|user1|  classification|transsaction_counts| ratio|
+-----+----------------+-------------------+------+
|    2|Unknown Category|                  1|100.0%|
|    3|         Utility|                  1|16.67%|
|    3|Unknown Category|                  5|83.33%|
|    4|Unknown Category|                  3| 50.0%|
|    4|        Activity|                  1|16.67%|
|    4|            Food|                  2|33.33%|
|   10|          People|                  2| 20.0%|
|   10|Unknown Category|                  3| 30.0%|
|   10|        Activity|                  1| 10.0%|
|   10|            Food|                  4| 40.0%|
|   11|Unknown Category|                 23| 92.0%|
|   11|            Food|                  2|  8.0%|
|   12|Unknown Category|                  9|100.0%|
|   13|Unknown Category|                 19|100.0%|
|   16|Unknown Category|                  4| 40.0%|
|   16|            Food|                  4| 40.0%|
|   16|     

**Task_4: In Task_3, we got a static spending profile. However, life and social networks are evolving over time. Therefore, let’s explore how a user’s spending profile is evolving over her lifetime in Venmo. First of all, we need to analyze a user’s transactions in monthly intervals, starting from 0 (indicating their first transaction only) up to 12.**

In [20]:
# calculate transaction date difference
Transaction = spark.sql("""
                SELECT user1, user2, datetime, classification, 
                DATEDIFF(datetime,FIRST_VALUE(datetime) OVER (PARTITION BY user1 ORDER BY datetime)) AS days
                FROM venmo_data
                GROUP BY user1, user2, datetime, classification
                ORDER BY user1
                """)
Transaction.show()

+-----+-------+-------------------+----------------+----+
|user1|  user2|           datetime|  classification|days|
+-----+-------+-------------------+----------------+----+
|    2|    220|2012-11-23 06:03:42|Unknown Category|   0|
|    3|2382556|2016-10-07 08:50:23|Unknown Category|  15|
|    3|1204190|2016-10-09 01:56:24|Unknown Category|  17|
|    3|7854140|2016-10-09 03:36:13|         Utility|  17|
|    3|     52|2016-09-22 15:30:09|Unknown Category|   0|
|    3|1079020|2016-10-07 23:37:56|Unknown Category|  15|
|    3|2382556|2016-10-06 10:49:45|Unknown Category|  14|
|    4| 221578|2016-04-17 03:35:09|Unknown Category|1231|
|    4|9271982|2016-03-03 12:45:57|        Activity|1186|
|    4| 187560|2015-06-17 09:23:30|Unknown Category| 926|
|    4| 968271|2014-02-04 06:51:33|Unknown Category| 428|
|    4| 125527|2012-12-15 05:51:12|            Food|  12|
|    4| 122744|2012-12-03 03:35:53|            Food|   0|
|   10|3844713|2016-01-30 14:42:13|            Food|1161|
|   10|    255

In [21]:
# convert days into month
Transaction_month = Transaction.select("*", 
          when(Transaction.days==0, 0)\
         .when((Transaction.days>0) & (Transaction.days<=30), 1) \
         .when((Transaction.days>30) & (Transaction.days<=60), 2) \
         .when((Transaction.days>60) & (Transaction.days<=90), 3)\
         .when((Transaction.days>90) & (Transaction.days<=120), 4)\
         .when((Transaction.days>120) & (Transaction.days<=150), 5)\
         .when((Transaction.days>150) & (Transaction.days<=180), 6)\
         .when((Transaction.days>180) & (Transaction.days<=210), 7)\
         .when((Transaction.days>210) & (Transaction.days<=240), 8)\
         .when((Transaction.days>240) & (Transaction.days<=270), 9)\
         .when((Transaction.days>270) & (Transaction.days<=300), 10)\
         .when((Transaction.days>300) & (Transaction.days<=330), 11)\
         .when((Transaction.days>330) & (Transaction.days<=360), 12)\
         .otherwise(-1).alias('month'))

Transaction_month.createOrReplaceTempView("Transaction_month")
spending_profile = Transaction_month.filter("month != -1 ").sort("user1")

spending_profile.createOrReplaceTempView("spending_profile")
spending_profile.show()

+-----+-------+-------------------+----------------+----+-----+
|user1|  user2|           datetime|  classification|days|month|
+-----+-------+-------------------+----------------+----+-----+
|    2|    220|2012-11-23 06:03:42|Unknown Category|   0|    0|
|    3|2382556|2016-10-06 10:49:45|Unknown Category|  14|    1|
|    3|1204190|2016-10-09 01:56:24|Unknown Category|  17|    1|
|    3|1079020|2016-10-07 23:37:56|Unknown Category|  15|    1|
|    3|2382556|2016-10-07 08:50:23|Unknown Category|  15|    1|
|    3|     52|2016-09-22 15:30:09|Unknown Category|   0|    0|
|    3|7854140|2016-10-09 03:36:13|         Utility|  17|    1|
|    4| 125527|2012-12-15 05:51:12|            Food|  12|    1|
|    4| 122744|2012-12-03 03:35:53|            Food|   0|    0|
|   10|    255|2013-01-14 08:18:43|        Activity|  50|    2|
|   10|     43|2013-03-03 12:47:10|            Food|  98|    4|
|   10|     43|2012-12-23 11:08:45|            Food|  28|    1|
|   10|    255|2013-03-11 04:38:51|     

In [None]:
percentage = spark.sql("""
                SELECT user1, month, classification,
                CONCAT(ROUND(COUNT(*)/SUM(count(*)) OVER (PARTITION BY user1, month), 2),'%') AS percentage
                FROM spending_profile 
                GROUP BY user1, month, classification
                ORDER BY user1
                """)

percentage.createOrReplaceTempView("percentage")

percentage.show()

In [None]:
classes = sorted(percentage.select("classification").distinct().rdd.map(lambda row: row[0]).collect())
col = [when(col("classification") == clasifications, col("percentage")).otherwise(None).alias(clasifications) for clasifications in classes]
max = [max(col(clasifications)).alias(clasifications) for clasifications in classes]

dynamic_profile = (dynamic_tbl.select(col("user1"), col("month"), *col)\
            .groupBy("user1", "month").agg(*max).na.fill(0)\
            .orderBy("user1", "month"))

dynamic_profile.createOrReplaceTempView("dynamic_profile")

dynamic_profile.show(50)

In [None]:
# calculate mean, standard deviation for each time period and each class
summary_stats = dynamic_profile.summary()
dynamic_summary = summary_stats.select("*").toPandas()

In [None]:
dynamic_summary = df_dynamic_summary.reset_index()
headers = dynamic_summary.iloc[0] 
dynamic_summary = dynamic_summary[2:] 
dynamic_summary = dynamic_summary.reset_index(drop=True)
dynamic_summary.head()

In [None]:
# spending behavior plot. 
fig, ax = plt.subplots()

for key, group in df_dynamic_summary.groupby('category'):
    group.plot('time', 'mean', yerr='2std', label=key, ax=ax) 

plt.show()