In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = pd.read_excel('Consumer_Complaints.xlsx')
df.head()

Unnamed: 0,Complaint ID,Submitted via,Date submitted,Date received,State,Product,Sub-product,Issue,Sub-issue,Company public response,Company response to consumer,Timely response?
0,4848023,Referral,2021-10-24,2021-10-27,NY,Mortgage,Conventional home mortgage,Applying for a mortgage or refinancing an exis...,,Company has responded to the consumer and the ...,Closed with explanation,Yes
1,3621464,Web,2020-04-24,2020-04-24,FL,"Money transfer, virtual currency, or money ser...",Refund anticipation check,Lost or stolen check,,Company has responded to the consumer and the ...,Closed with monetary relief,Yes
2,5818349,Web,2022-07-27,2022-07-27,CA,"Credit reporting, credit repair services, or o...",Credit reporting,Incorrect information on your report,Account information incorrect,Company has responded to the consumer and the ...,Closed with explanation,Yes
3,7233015,Referral,2023-07-10,2023-07-11,CA,Credit card or prepaid card,General-purpose prepaid card,Problem getting a card or closing an account,"Trouble getting, activating, or registering a ...",,In progress,
4,5820224,Referral,2022-07-27,2022-07-28,VA,Credit card or prepaid card,General-purpose credit card or charge card,Closing your account,Company closed your account,Company has responded to the consumer and the ...,Closed with explanation,Yes


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62516 entries, 0 to 62515
Data columns (total 13 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   Complaint ID                  62516 non-null  int64         
 1   Submitted via                 62516 non-null  object        
 2   Date submitted                62516 non-null  datetime64[ns]
 3   Date received                 62516 non-null  datetime64[ns]
 4   State                         62516 non-null  object        
 5   Product                       62516 non-null  object        
 6   Sub-product                   62509 non-null  object        
 7   Issue                         62516 non-null  object        
 8   Sub-issue                     51658 non-null  object        
 9   Company public response       60341 non-null  object        
 10  Company response to consumer  62516 non-null  object        
 11  Timely response?            

In [4]:
df.isna().sum()

Complaint ID                        0
Submitted via                       0
Date submitted                      0
Date received                       0
State                               0
Product                             0
Sub-product                         7
Issue                               0
Sub-issue                       10858
Company public response          2175
Company response to consumer        0
Timely response?                 1494
dtype: int64

In [6]:
df['Company response to consumer'].value_counts()

Company response to consumer
Closed with explanation            41044
Closed with monetary relief        14697
Closed with non-monetary relief     5273
In progress                         1494
Closed                                 8
Name: count, dtype: int64

#### <center> Section - A [Calculate top-level KPIs] </center>
Your first objective is to calculate top-level KPIs by flagging complaints as "Open" or "Closed" and creating a PivotTable to count the complaints for each status.
<br>
**Tasks**:
1. Create a Status column that flags complaints as "Open" or "Closed" based on the values in the Company Response to Consumer column ("In progress" = "Open", others are "Closed").
2. Create a Week start column with the date for the corresponding Monday of each Date received (for example, if the “Date received” is Wednesday, Jan 4th, then the “Week start” is Monday, Jan 2nd).
3. Extract the Year, Month, and Day of the Week start column.
4. Change the formula for the Month column so that it returns the text for the month name in the “mmm” format (1=“Jan”, 2=“Feb”, etc.).

In [7]:
df['Status'] = df['Company response to consumer'].apply(lambda x: 'Open' if x == 'In progress' else 'Closed')
df['Status'].value_counts()

Status
Closed    61022
Open       1494
Name: count, dtype: int64

In [13]:
df['Week Start'] = df['Date received'] - pd.to_timedelta(df['Date received'].dt.weekday, unit='D')
df.head()

Unnamed: 0,Complaint ID,Submitted via,Date submitted,Date received,State,Product,Sub-product,Issue,Sub-issue,Company public response,Company response to consumer,Timely response?,Status,Week Start
0,4848023,Referral,2021-10-24,2021-10-27,NY,Mortgage,Conventional home mortgage,Applying for a mortgage or refinancing an exis...,,Company has responded to the consumer and the ...,Closed with explanation,Yes,Closed,2021-10-25
1,3621464,Web,2020-04-24,2020-04-24,FL,"Money transfer, virtual currency, or money ser...",Refund anticipation check,Lost or stolen check,,Company has responded to the consumer and the ...,Closed with monetary relief,Yes,Closed,2020-04-20
2,5818349,Web,2022-07-27,2022-07-27,CA,"Credit reporting, credit repair services, or o...",Credit reporting,Incorrect information on your report,Account information incorrect,Company has responded to the consumer and the ...,Closed with explanation,Yes,Closed,2022-07-25
3,7233015,Referral,2023-07-10,2023-07-11,CA,Credit card or prepaid card,General-purpose prepaid card,Problem getting a card or closing an account,"Trouble getting, activating, or registering a ...",,In progress,,Open,2023-07-10
4,5820224,Referral,2022-07-27,2022-07-28,VA,Credit card or prepaid card,General-purpose credit card or charge card,Closing your account,Company closed your account,Company has responded to the consumer and the ...,Closed with explanation,Yes,Closed,2022-07-25


In [14]:
df['WS Year'] = df['Week Start'].dt.year
df['WS Month'] = df['Week Start'].dt.month
df['WS Day'] = df['Week Start'].dt.day
df.head()

Unnamed: 0,Complaint ID,Submitted via,Date submitted,Date received,State,Product,Sub-product,Issue,Sub-issue,Company public response,Company response to consumer,Timely response?,Status,Week Start,WS Year,WS Month,WS Day
0,4848023,Referral,2021-10-24,2021-10-27,NY,Mortgage,Conventional home mortgage,Applying for a mortgage or refinancing an exis...,,Company has responded to the consumer and the ...,Closed with explanation,Yes,Closed,2021-10-25,2021,10,25
1,3621464,Web,2020-04-24,2020-04-24,FL,"Money transfer, virtual currency, or money ser...",Refund anticipation check,Lost or stolen check,,Company has responded to the consumer and the ...,Closed with monetary relief,Yes,Closed,2020-04-20,2020,4,20
2,5818349,Web,2022-07-27,2022-07-27,CA,"Credit reporting, credit repair services, or o...",Credit reporting,Incorrect information on your report,Account information incorrect,Company has responded to the consumer and the ...,Closed with explanation,Yes,Closed,2022-07-25,2022,7,25
3,7233015,Referral,2023-07-10,2023-07-11,CA,Credit card or prepaid card,General-purpose prepaid card,Problem getting a card or closing an account,"Trouble getting, activating, or registering a ...",,In progress,,Open,2023-07-10,2023,7,10
4,5820224,Referral,2022-07-27,2022-07-28,VA,Credit card or prepaid card,General-purpose credit card or charge card,Closing your account,Company closed your account,Company has responded to the consumer and the ...,Closed with explanation,Yes,Closed,2022-07-25,2022,7,25


In [15]:
df['Status'].value_counts()

Status
Closed    61022
Open       1494
Name: count, dtype: int64