Code in this notebook is used to make chart for one time period. The dates for the desired study period can be set through the start_date and end_date variables.

In [3]:
import pandas as pd
import numpy as np
import altair as alt



In [4]:
#top_5_list identified in Generic BERTopic notebook

top_5_list = ['EQUIFAX, INC.',
 'Experian Information Solutions Inc.',
 'TRANSUNION INTERMEDIATE HOLDINGS, INC.',
 'WELLS FARGO & COMPANY',
 'BANK OF AMERICA, NATIONAL ASSOCIATION']

In [5]:
#Loads the CFPB Complaints Database

df=pd.read_csv('complaints.csv', low_memory=False)
df['Date_received_dt'] = pd.to_datetime(df['Date received'], format='%Y/%m/%d')

In [6]:
def get_date_df(start_date, end_date):
    df_date = None
    df_date = df[(df['Date received'] >= start_date ) & (df['Date received'] <= end_date)]
    return df_date

In [7]:
start_date = '2018-01-01'
end_date = '2019-12-31'


df_date = get_date_df(start_date, end_date)


In [9]:
#Code to make Equifax chart
equifax_df = df_date[df_date['Company'].values  == 'EQUIFAX, INC.']

equifax_issues_df = equifax_df.groupby('Issue')['Issue'].count().reset_index(name='count') \
    .sort_values(['count'], ascending=False)

#create a dataframe that only has data for complaints with public customer complaint narrative
equifax_narrative_only_df = equifax_df.dropna(subset=['Consumer complaint narrative'])

equifax_narrative_only_issues_df = equifax_narrative_only_df.groupby('Issue')['Issue'].count().reset_index(name='Count') \
    .sort_values(['Count'], ascending=False)

equifax_bars = alt.Chart(equifax_narrative_only_issues_df.head(5)).mark_bar().encode(
    x='Count:Q',
    y=alt.Y('Issue:N', sort='-x')
).properties(width=300, height =200, title='Top 5 Issue Categories 2018-2019').configure_legend(
    labelLimit= 300, titleFontSize = 18, labelFontSize=15)



# equifax_donut = alt.Chart(equifax_narrative_only_issues_df.head(5)).mark_arc(innerRadius=120).encode(
#     theta=alt.Theta(field="Count", type="quantitative"),
#     color=alt.Color(field="Issue", type="nominal", 
#                     legend=alt.Legend(orient="right"))
# ).properties(width=400, height =400, title='Top 5 Issue Categories 2018-2019').configure_legend(labelLimit= 300, titleFontSize = 18, labelFontSize=15).configure_view(
#         strokeWidth=0
# )
    
equifax_bars

#equifax_donut

# print(equifax_issues_df)

# print(equifax_narrative_only_issues_df)

In [10]:
#Code to make Experian Chart
experian_df = df_date[df_date['Company'].values  == 'Experian Information Solutions Inc.']

experian_issues_df = experian_df.groupby('Issue')['Issue'].count().reset_index(name='count') \
    .sort_values(['count'], ascending=False)


#create a dataframe that only has data for complaints with public customer complaint narrative
experian_narrative_only_df = experian_df.dropna(subset=['Consumer complaint narrative'])

experian_narrative_only_issues_df = experian_narrative_only_df.groupby('Issue')['Issue'].count().reset_index(name='Count') \
    .sort_values(['Count'], ascending=False)

experian_bars = alt.Chart(experian_narrative_only_issues_df.head(5)).mark_bar().encode(
    x='Count:Q',
    y=alt.Y('Issue:N', sort='-x')
).properties(width=300, height =200, title='Top 5 Issue Categories 2018-2019').configure_legend(
    labelLimit= 300, titleFontSize = 18, labelFontSize=15)

experian_bars

# print(experian_issues_df)

# print(experian_narrative_only_issues_df)

In [11]:
#Code to make Transunion Chart

transunion_df = df_date[df_date['Company'].values  == 'TRANSUNION INTERMEDIATE HOLDINGS, INC.']
transunion_issues_df = transunion_df.groupby('Issue')['Issue'].count().reset_index(name='count') \
    .sort_values(['count'], ascending=False)

#create a dataframe that only has data for complaints with public customer complaint narrative
transunion_narrative_only_df = transunion_df.dropna(subset=['Consumer complaint narrative'])

transunion_narrative_only_issues_df = transunion_narrative_only_df.groupby('Issue')['Issue'].count().reset_index(name='Count') \
    .sort_values(['Count'], ascending=False)


transunion_bars = alt.Chart(transunion_narrative_only_issues_df.head(5)).mark_bar().encode(
    x='Count:Q',
    y=alt.Y('Issue:N', sort='-x')
).properties(width=300, height =200, title='Top 5 Issue Categories 2018-2019').configure_legend(
    labelLimit= 300, titleFontSize = 18, labelFontSize=15)

transunion_bars

# print(transunion_issues_df)

# print(transunion_narrative_only_issues_df)

In [14]:
#Code to make Wells Fargo Chart

wellsfargo_df = df_date[df_date['Company'].values  == 'WELLS FARGO & COMPANY']
wellsfargo_issues_df = wellsfargo_df.groupby('Issue')['Issue'].count().reset_index(name='count') \
    .sort_values(['count'], ascending=False)

#create a dataframe that only has data for complaints with public customer complaint narrative
wellsfargo_narrative_only_df = wellsfargo_df.dropna(subset=['Consumer complaint narrative'])

wellsfargo_narrative_only_issues_df = wellsfargo_narrative_only_df.groupby('Issue')['Issue'].count().reset_index(name='Count') \
    .sort_values(['Count'], ascending=False)

wellsfargo_bars = alt.Chart(wellsfargo_narrative_only_issues_df.head(5)).mark_bar().encode(
    x=alt.X('Count:Q', scale=alt.Scale(domain=[0, 1600])),
    y=alt.Y('Issue:N', sort='-x')
).properties(width=300, height =200, title='Top 5 Issue Categories 2018-2019').configure_legend(
    labelLimit= 600, titleFontSize = 18, labelFontSize=15)

wellsfargo_bars

# print(wellsfargo_issues_df)

# print(wellsfargo_narrative_only_issues_df)

In [15]:
#Code to make Bank of America Chart

boa_df = df_date[df_date['Company'].values  == 'BANK OF AMERICA, NATIONAL ASSOCIATION']
boa_issues_df = boa_df.groupby('Issue')['Issue'].count().reset_index(name='count') \
    .sort_values(['count'], ascending=False)

#create a dataframe that only has data for complaints with public customer complaint narrative
boa_narrative_only_df = boa_df.dropna(subset=['Consumer complaint narrative'])

boa_narrative_only_issues_df = boa_narrative_only_df.groupby('Issue')['Issue'].count().reset_index(name='Count') \
    .sort_values(['Count'], ascending=False)

boa_bars = alt.Chart(boa_narrative_only_issues_df.head(5)).mark_bar().encode(
    x=alt.X('Count:Q', scale=alt.Scale(domain=[0, 1250])),
    y=alt.Y('Issue:N', sort='-x')
).properties(width=300, height =200, title='Top 5 Issue Categories 2018-2019').configure_legend(
    labelLimit= 600, titleFontSize = 18, labelFontSize=15)

boa_bars

# print(boa_issues_df)

# print(boa_narrative_only_issues_df)