# Data Engineering



<a href="https://colab.research.google.com/github/gassaf2/DataEngineering/blob/main/Project/GASSAF_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>




In [1]:
#importing libraries
import pandas as pd
import os
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px


The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc
The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html


In [2]:
sales_data=pd.read_csv("./sample_data/sales_data.csv")
sales_data.head()

Unnamed: 0,date,product id,sales amount,store location
0,2023-09-01,P001,500,New York
1,2023-09-02,P002,300,Los Angeles
2,2023-09-03,P003,450,Chicago
3,2023-09-01,P003,824,Los Angeles
4,2023-09-02,P002,671,New York


In [3]:
# Calculate basic statistics
total_sales = sales_data["sales amount"].sum()

# Sales by location
sales_by_location = sales_data.groupby("store location")["sales amount"].sum()

# Sales by product
sales_by_product = sales_data.groupby("product id")["sales amount"].sum()

# Display the results
print(f"Total Sales Amount: {total_sales}")
print("\nSales by Location:")
print(sales_by_location)
print("\nSales by Product:")
print(sales_by_product)

Total Sales Amount: 112139

Sales by Location:
store location
Chicago        19919
Houston        28542
Los Angeles    18800
Miami          16226
New York       28652
Name: sales amount, dtype: int64

Sales by Product:
product id
P001    18835
P002    26298
P003    20829
P004    24978
P005    21199
Name: sales amount, dtype: int64


# Set up the dash application

In [4]:
# Initialize the Dash app
app = dash.Dash(__name__)

# Create the layout of the application

In [5]:
# Define the layout of the app
app.layout = html.Div([
    html.H1("Sales Dashboard", style={'text-align': 'center'}),
    html.H2("Dataset:Sales Data", style={'text-align': 'center'}),
    # Dropdown for selecting the country
  #  dcc.Dropdown(
  #      id="selected-country",
  #      options=[{'label': country, 'value': country} for country in df['country'].unique()],
  #      value='India',  # Default value
  #      multi=False,
  #      style={'width': '50%'}
  #  ),
    html.Label("Select a Category:"),
    dcc.RadioItems(
        id="category-selector",
        options=[
            {"label": "Store Location", "value": "store location"},
            {"label": "Product", "value": "product id"}
        ],
        value="store location",
        inline=True
    ),
    # Graph for visualizing life expectancy over time
    dcc.Graph(id="sales-bar-chart"),

])

In [6]:
# Callback to update the bar chart
@app.callback(
    Output("sales-bar-chart", "figure"),
    Input("category-selector", "value")
)
def update_bar_chart(category):
    # Group data by the selected category
    
    #category="store location"
    filtered_sales_data = sales_data.groupby(category)["sales amount"].sum().reset_index()

    # Create bar chart
    fig = px.bar(filtered_sales_data, x=category, y="sales amount", title=f"Sales Amount by {category}", 
             labels={"Sales Amount": "Sales Amount", "Product ID": "Product ID"},
             color_discrete_sequence=["skyblue"])

    # Show the figure
    fig.show()
    return fig

In [7]:
# Run the app
if __name__ == "__main__":
    app.run_server(debug=False)