# 02. Exploratory Data Analysis

Importing packages for EDA.

In [77]:
# Importing packages
import pandas as pd
import numpy as np

# Importing plotting packages
import plotly.express as px
import plotly.graph_objects as go

## Reading data

In [78]:
# Loading data
transactions_df = pd.read_csv("../data/clean/transactions_clean.csv", encoding="ISO-8859-1")

In [79]:
# Inspecting data
transactions_df.head()

Unnamed: 0.1,Unnamed: 0,date,transaction_amount,text,year,month,expenditure
0,0,2022-06-01,188.0,BS FITNESS WORLD A/S,2022,2022-Jun,1
1,1,2022-06-01,30.0,DK-NOTA 246 BYGST 407,2022,2022-Jun,1
2,2,2022-06-02,30.0,"DK-NOTA 283 COOR KANTINE, 407",2022,2022-Jun,1
3,3,2022-06-03,230.0,DK-NOTAZ0089 CINEMAXX FISKETORV,2022,2022-Jun,1
4,4,2022-06-03,116.93,DK-NOTAZ0174 CINEMAXX FISKETORV,2022,2022-Jun,1


In [80]:
# Dropping the unnamed column
transactions_df.drop(["Unnamed: 0"], axis=1, inplace=True)

# Display five first rows
transactions_df.head()

Unnamed: 0,date,transaction_amount,text,year,month,expenditure
0,2022-06-01,188.0,BS FITNESS WORLD A/S,2022,2022-Jun,1
1,2022-06-01,30.0,DK-NOTA 246 BYGST 407,2022,2022-Jun,1
2,2022-06-02,30.0,"DK-NOTA 283 COOR KANTINE, 407",2022,2022-Jun,1
3,2022-06-03,230.0,DK-NOTAZ0089 CINEMAXX FISKETORV,2022,2022-Jun,1
4,2022-06-03,116.93,DK-NOTAZ0174 CINEMAXX FISKETORV,2022,2022-Jun,1


## Exploring data to identy KPI's for dashboard

### Monthly Expenditures and Income

Preparing monthly expenditures and income for plotting

In [134]:
# Creating seperate dataframes for expenditure and income
expenditure = transactions_df[transactions_df["expenditure"]==1].groupby("month").agg({"transaction_amount":"sum"})["transaction_amount"]
income = transactions_df[transactions_df["expenditure"]==0].groupby("month").agg({"transaction_amount":"sum"})["transaction_amount"]
months = transactions_df["month"].unique()
avg_expenditure = [expenditure.sum() / len(months)]*len(months)
avg_income = [income.sum() / len(months)]*len(months)

Creating plot

In [137]:
# Create figure
fig = go.Figure(
  data=[
    go.Bar(
      name='Expenditures', 
      x=months,
      y=expenditure,
      marker_color="#8839ef"
      ),
    go.Bar(
      name='Income',
      x=months,
      y=income,
      marker_color="#7287fd"
      ),
    go.Scatter(
      name='Average Expenditure',
      x=months,
      y=avg_expenditure,
      mode='lines',
      line=dict(color='#8839ef', width=2, dash='dash')
      ),
    go.Scatter(
      name='Average Income',
      x=months,
      y=avg_income,
      mode='lines',
      line=dict(color='#7287fd', width=2, dash='dash')
      )
      ],
    layout=go.Layout(
      title=go.layout.Title(text="Monthly Expenditures and Income (DKK)"),
      yaxis=dict(
            tickformat=',.0f'
        ),
      template="simple_white"
    )
)

# Change the bar mode
fig.update_layout(barmode='group')
fig.show()

## Ideer til andre plots

- Total på årlig basis i tabel
- Fordeling mellem kategorier
- Inkludere aktier fra Nordnet


app.layout = [
    html.H1(children="Personal Finance Dashboard"),
    html.Div(
        children="This is a dashboard developed to provide a brief overview of my personal finances."
    ),
    dash_table.DataTable(
        data=transactions_df[
            ["date", "text", "transaction_amount", "expenditure"]
        ].to_dict("records"),
        page_size=10,
    ),
    dcc.Graph(figure=fig),
]
