In [None]:
import sqlite3
import pandas as pd
import numpy as np

salary_bands = pd.read_excel('task/population salary analysis.xlsx', sheet_name='Salary Bands')

conn = sqlite3.connect('task/population.db')
query = "SELECT * FROM population"
population_df = pd.read_sql(query, conn)
conn.close()

def categorize_salary(salary):
    for _, band in salary_bands.iterrows():
        if band['Min'] <= salary < band['Max']:
            return band['Category']
    return 'Unknown'

population_df['Salary Category'] = population_df['Salary'].apply(categorize_salary)

def calculate_measures(df):
    measures = {
        'Percentage of Population': df.groupby('Salary Category').size() / len(df) * 100,
        'Average Salary': df.groupby('Salary Category')['Salary'].mean(),
        'Median Salary': df.groupby('Salary Category')['Salary'].median(),
        'Population Count': df.groupby('Salary Category').size()
    }
    return pd.concat(measures, axis=1)

overall_results = calculate_measures(population_df)

if 'State' in population_df.columns:
    state_results = population_df.groupby('State').apply(calculate_measures)
    state_results = state_results.unstack(level=1)
else:
    state_results = "No State information in the dataset"

print("Overall Results:")
print(overall_results)

print("\nResults by State:")
print(state_results)