<a href="https://colab.research.google.com/github/ipeirotis-org/datasets/blob/main/Cybersyn_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q snowflake


In [None]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

In [None]:
import json
from snowflake.snowpark.session import Session

connection_parameters = {
  "account" : "cfcxbpg-ct14445",
  "user" : "ipeirotis",
  "password" : "PASSWORD",
  "database" : "cybersyn_financial__economic_essentials",
  "schema" : "cybersyn"
}
session = Session.builder.configs(connection_parameters).create()


# Plot Inflation Data

In [None]:
sql_query = """
WITH fred_ids AS (
    SELECT VARIABLE, SERIES_ID
    FROM cybersyn_financial__economic_essentials.cybersyn.FINANCIAL_FRED_VARIABLE_SERIES_ID_CROSSWALK
),
macro_data AS (
    SELECT VARIABLE, DATE, VALUE
    FROM cybersyn_financial__economic_essentials.cybersyn.FINANCIAL_FRED_TIMESERIES
),
fred_data_ids AS (
    SELECT
        f.VARIABLE,
        f.SERIES_ID,
        m.DATE,
        m.VALUE
    FROM fred_ids f
    JOIN macro_data m ON f.VARIABLE = m.VARIABLE
)
SELECT
    VARIABLE,
    SERIES_ID,
    DATE,
    VALUE
FROM fred_data_ids
WHERE SERIES_ID IN ('PCEPI', 'CPIAUCSL')
ORDER BY DATE;
"""

result = session.sql(sql_query).collect()
df = pd.DataFrame(result, columns=["VARIABLE", "SERIES_ID", "DATE", "VALUE"])
plot = df.pivot_table(
    index='DATE',
    columns='SERIES_ID',
    values='VALUE'
).plot(
    linestyle='--',
    linewidth=1,
    markersize=10,
    figsize=(18, 6), grid=True,
    title='Consumer Price Index (CPI) vs. Personal Consumption Expenditures (PCEPI)'
)

plot.set_xlabel('Date')
plot.set_ylabel('CPI')

pass

# Credit card company complaints
 Find the top-10 companies in terms of complaints since 2012 (subquery)
 and then count credit card complaints by month by company since 2012.

In [None]:
sql = '''
WITH top5_companies AS (
  SELECT company
  FROM cybersyn.financial_cfpb_complaint
  WHERE product ILIKE '%card%'
    AND date_received >= '2012-01-01'
  GROUP BY company
  ORDER BY COUNT(*) DESC
  LIMIT 5
)
SELECT company,
       DATE_TRUNC('month', date_received) AS month,
       COUNT(*)                           AS credit_card_complaint
FROM cybersyn.financial_cfpb_complaint
WHERE product ILIKE '%card%'
  AND date_received >= '2012-01-01'
  AND company IN (SELECT company FROM top5_companies)
GROUP BY company, month;
'''


result = session.sql(sql).collect()
df = pd.DataFrame(result)
df.pivot_table(
    index='MONTH',
    columns='COMPANY',
    values='CREDIT_CARD_COMPLAINT'
).plot( figsize = (16,5), grid = True)