# Data Engineering

*Notebook 1.1: Data Visualization Libraries*

<a href="https://github.com/gassaf2/AA1614_assaf/blob/main/Notebook1.1_DataVisualizationLibraries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

*This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; *

*The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*

In [1]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd

The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc
The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html


# Load dataset

In [2]:
# Load the dataset
df = px.data.gapminder()

# Display the first few rows of the dataset
df.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4
1,Afghanistan,Asia,1957,30.332,9240934,820.85303,AFG,4
2,Afghanistan,Asia,1962,31.997,10267083,853.10071,AFG,4
3,Afghanistan,Asia,1967,34.02,11537966,836.197138,AFG,4
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4


# Set up the dash application

In [3]:
# Initialize the Dash app
app = dash.Dash(__name__)

# Create the layout of the application

In [4]:
# Define the layout of the app
app.layout = html.Div([
    html.H1("Interactive Data Visualization Dashboard", style={'text-align': 'center'}),
    html.H2("Dataset:gapminder", style={'text-align': 'center'}),
    # Dropdown for selecting the country
    dcc.Dropdown(
        id="selected-country",
        options=[{'label': country, 'value': country} for country in df['country'].unique()],
        value='India',  # Default value
        multi=False,
        style={'width': '50%'}
    ),

    # Graph for visualizing life expectancy over time
    dcc.Graph(id="line-chart"),

dcc.RangeSlider(
        id="year-range-slider",
        min=1950,
        max=2020,
        step=1,
        marks={year: str(year) for year in range(1950, 2021, 10)},
        value=[1970, 2000],  # Default range
        tooltip={"placement": "bottom", "always_visible": True},
    ),
    
    dcc.Graph(id="line-population"),
    # Dropdown for selecting the continent
    dcc.Dropdown(
        id="selected-continent",
        options=[{'label': continent, 'value': continent} for continent in df['continent'].unique()],
        value='Asia',  # Default value
        multi=False,
        style={'width': '50%'}
    ),

    

    # Graph for visualizing GDP vs Life Expectancy
    dcc.Graph(id="scatter-plot")
])

In [5]:
# Callback to update the line chart based on selected country
@app.callback(
    Output('line-chart', 'figure'),
    [Input('selected-country', 'value')]
)
def update_line_chart(selected_country):
    # Filter the data for the selected country
    filtered_df = df[df['country'] == selected_country]

    # Create the line chart
    fig = px.line(filtered_df, x="year", y="lifeExp", title=f'Life Expectancy in {selected_country}')
    return fig

In [6]:
# Callback to update the population line chart based on selected country and range of year
@app.callback(
    Output('line-population', 'figure'),
    [Input('selected-country', 'value'),
    Input("year-range-slider", "value")]
)
def update_line_chart_pop(selected_country,selected_range):
    # Filter the data for the selected country
    filtered_df = df[df['country'] == selected_country]

    #filter the data by the selected year on the slider
    filtered_df=filtered_df[(filtered_df["year"] >= selected_range[0]) & (filtered_df["year"] <= selected_range[1])]
    # Create the line chart
    fig2 = px.line(filtered_df, x="year", y="pop", title=f'Population in {selected_country}')
    return fig2

In [7]:
# Callback to update the scatter plot based on selected continent
@app.callback(
    Output('scatter-plot', 'figure'),
    [Input('selected-continent', 'value')]
)
def update_scatter_plot(selected_continent):
    # Filter the data for the selected continent
    filtered_df = df[df['continent'] == selected_continent]

    # Create the scatter plot
    fig = px.scatter(filtered_df, x='gdpPercap', y='lifeExp', color='country',
                     size='pop', hover_name='country', log_x=True,
                     title=f'Life Expectancy and GDP in {selected_continent}')
    return fig

In [8]:
# Run the Dash app
if __name__ == '__main__':
    app.run_server(debug=False)