In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd

# Load your dataset (assuming it's a CSV file)
df = pd.read_csv('your_dataset.csv')

# Assuming your dataset has columns like 'Company', 'Salary'
# Filter the dataset to include companies with valid salary values
df = df.dropna(subset=['Salary'])

# Define the Dash app
app = dash.Dash(__name__)

# Define the salary ranges (starting from 10 up to 50 with a step of 10)
salary_ranges = list(range(10, 51, 10))

# Define the layout of the Dash app
app.layout = html.Div([
    html.Label('Select Salary Range:'),
    dcc.Dropdown(
        id='salary-range-dropdown',
        options=[{'label': f'{start}-{start+9}', 'value': start} for start in salary_ranges],
        value=10  # Default value
    ),
    html.Label('Select Number of Top Companies to Display:'),
    dcc.Dropdown(
        id='top-companies-dropdown',
        options=[
            {'label': 'Top 10', 'value': 10},
            {'label': 'Top 20', 'value': 20},
            {'label': 'Top 30', 'value': 30},
            {'label': 'Top 40', 'value': 40},
            {'label': 'Top 50', 'value': 50}
        ],
        value=10  # Default value
    ),
    html.Div(id='company-results')
])

# Define callback to update displayed companies based on selected salary range and top N companies
@app.callback(
    Output('company-results', 'children'),
    [Input('salary-range-dropdown', 'value'),
     Input('top-companies-dropdown', 'value')]
)
def update_company_results(selected_range, top_n):
    if selected_range is None or top_n is None:
        return html.P('Please select a salary range and number of top companies.')

    # Filter companies within the selected salary range
    min_salary = selected_range
    max_salary = selected_range + 9
    filtered_companies = df[(df['Salary'] >= min_salary) & (df['Salary'] <= max_salary)]

    if filtered_companies.empty:
        return html.P('No companies found in this salary range.')

    # Calculate average salary for each company
    avg_salary_by_company = filtered_companies.groupby('Company')['Salary'].mean().sort_values(ascending=False)

    # Display top N companies with highest average salary
    top_companies = avg_salary_by_company.head(top_n)
    company_table = html.Table([
        html.Thead(html.Tr([html.Th('Company'), html.Th('Average Salary')])),
        html.Tbody([html.Tr([html.Td(company), html.Td(f'${avg:.2f}')]) for company, avg in top_companies.items()])
    ])

    return company_table

# Run the Dash app
if __name__ == '__main__':
    app.run_server(debug=True)


In [1]:
import pandas as pd
data = pd.read_csv("companies.csv")
high_salary= data[["Company_name", "Avg_salary"]]



In [1]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px

# Load your dataset (assuming it's a CSV file with columns 'Company_name' and 'Avg_salary')
data = pd.read_csv("companies.csv")
high_salary= data[["Company_name", "Avg_salary"]]
df=high_salary

# Define the Dash app
app = dash.Dash(__name__)

# Define the layout of the Dash app
app.layout = html.Div([
    html.Label('Select Number of Top Companies to Display:'),
    dcc.Dropdown(
        id='top-companies-dropdown',
        options=[
            {'label': 'Top 10', 'value': 10},
            {'label': 'Top 20', 'value': 20},
            {'label': 'Top 30', 'value': 30},
            {'label': 'Top 40', 'value': 40}
        ],
        value=10  # Default value
    ),
    dcc.Graph(id='bar-chart')
])

# Define callback to update bar chart based on selected number of top companies
@app.callback(
    Output('bar-chart', 'figure'),
    [Input('top-companies-dropdown', 'value')]
)
def update_bar_chart(top_n):
    if top_n is None:
        return {}

    # Sort companies by average salary in descending order and select the top N companies
    top_companies = df.nlargest(top_n, 'Avg_salary')

    # Create a bar chart using Plotly Express
    fig = px.bar(top_companies, x='Company_name', y='Avg_salary', 
                 title=f'Top {top_n} Companies with Highest Average Salary',
                 labels={'Company_name': 'Company Name', 'Avg_salary': 'Average Salary ($)'})

    return fig

# Run the Dash app
if __name__ == '__main__':
    app.run_server(debug=True)


ModuleNotFoundError: No module named 'dash'

In [2]:
pip install dash


Defaulting to user installation because normal site-packages is not writeable
Collecting dash
  Downloading dash-2.16.1-py3-none-any.whl (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m163.6 kB/s[0m eta [36m0:00:00[0m00:01[0m00:02[0m
[?25hCollecting dash-table==5.0.0
  Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Collecting dash-core-components==2.0.0
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Collecting retrying
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Collecting Flask<3.1,>=1.0.4
  Downloading flask-3.0.3-py3-none-any.whl (101 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m239.8 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting Werkzeug<3.1
  Downloading werkzeug-3.0.2-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.8/226.8 kB[0m [31m203.0 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Coll

In [3]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px
import warnings

def fxn():
    warnings.warn("deprecated", DeprecationWarning)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    fxn()
# Load your dataset (assuming it's a CSV file with columns 'Company_name' and 'Avg_salary')
data = pd.read_csv("companies.csv")
high_salary= data[["Company_name", "Avg_salary"]]

def convert_salary(salary_str):
    try:
        if 'k' in salary_str:
            return int(float(salary_str.replace('k', '')) * 1000)
        elif 'm' in salary_str:
            return int(float(salary_str.replace('m', '')) * 1000000)
        else:
            return int(salary_str)
    except (TypeError, ValueError):
        return None  # Return None for invalid or missing values

# Apply the conversion function to the 'Avg_salary' column
high_salary['Avg_salary'] = high_salary['Avg_salary'].apply(convert_salary)


df=high_salary.copy()
# Define the Dash app
app = dash.Dash(__name__)

# Define the layout of the Dash app
app.layout = html.Div([
    html.Label('Select Number of Top Companies to Display:'),
    dcc.Dropdown(
        id='top-companies-dropdown',
        options=[
            {'label': 'Top 10', 'value': 10},
            {'label': 'Top 20', 'value': 20},
            {'label': 'Top 30', 'value': 30},
            {'label': 'Top 40', 'value': 40}
        ],
        value=10  # Default value
    ),
    dcc.Graph(id='bar-chart')
])

# Define callback to update bar chart based on selected number of top companies
@app.callback(
    Output('bar-chart', 'figure'),
    [Input('top-companies-dropdown', 'value')]
)
def update_bar_chart(top_n):
    if top_n is None:
        return {}

    # Sort companies by average salary in descending order and select the top N companies
    top_companies = df.nlargest(top_n, 'Avg_salary')

    # Create a bar chart using Plotly Express
    fig = px.bar(top_companies, x='Company_name', y='Avg_salary', 
                 title=f'Top {top_n} Companies with Highest Average Salary',
                 )

    return fig

# Run the Dash app
if __name__ == '__main__':
    app.run_server(debug=True)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

