<a href="https://colab.research.google.com/github/elamukilan35/Health_insurance_predictions/blob/main/health_insurance_prediction_09.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
!pip install streamlit
!npm install localtunnel
!pip install streamlit scikit-learn

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K
up to date, audited 23 packages in 960ms
[1G[0K⠦[1G[0K
[1G[0K⠦[1G[0K3 packages are looking for funding
[1G[0K⠦[1G[0K  run `npm fund` for details
[1G[0K⠦[1G[0K
2 [33m[1mmoderate[22m[39m severity vulnerabilities

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.


In [30]:
%%writefile health_insurance_prediction_app.py
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import PolynomialFeatures

# Title of the App
st.title("Health Insurance Charges Prediction")

# File uploader for users to upload their own datasets
uploaded_file = st.file_uploader("Upload your Insurance data CSV", type='csv')

# Load the dataset
if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)

    # Display dataset information
    st.write('Data preview')
    st.dataframe(df.head())

    # Check the missing values
    st.write('Missing values from the dataset:')
    st.write(df.isnull().sum())

    # Separating Numerical and Categorical columns
    numerical_columns = df.select_dtypes(include=np.number).columns
    categorical_columns = df.select_dtypes(exclude=np.number).columns

    # Impute categorical and numerical columns
    imputer_cat = SimpleImputer(strategy='most_frequent')
    df[categorical_columns] = imputer_cat.fit_transform(df[categorical_columns])

    imputer_num = SimpleImputer(strategy='mean')
    df[numerical_columns] = imputer_num.fit_transform(df[numerical_columns])

    st.write('Data after handling null values')
    st.dataframe(df.head(8))

    # Label encode Categorical Values
    l_encode_smoker = LabelEncoder()
    l_encode_region = LabelEncoder()
    l_encode_sex = LabelEncoder()

    df['smoker'] = l_encode_smoker.fit_transform(df['smoker'])
    df['region'] = l_encode_region.fit_transform(df['region'])
    df['sex'] = l_encode_sex.fit_transform(df['sex'])

    # Feature Engineering: Add interaction terms
    poly = PolynomialFeatures(interaction_only=True, include_bias=False)
    df_interactions = poly.fit_transform(df[['age', 'bmi', 'children', 'smoker', 'region', 'sex']])
    df_interactions = pd.DataFrame(df_interactions, columns=poly.get_feature_names_out(['age', 'bmi', 'children', 'smoker', 'region', 'sex']))
    df = pd.concat([df, df_interactions], axis=1)

    # Selecting the relevant features and target variable
    x = df.drop(columns=['charges'], axis=1)
    y = df['charges']

    # Train and Split
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

    # Train Linear Regression model
    lr_model = LinearRegression()
    lr_model.fit(x_train, y_train)

    # Prediction on Test set
    y_pred = lr_model.predict(x_test)
    mse = mean_squared_error(y_test, y_pred)
    st.write(f"Model Test MSE: {mse:.2f}")

    # --- User Input for Prediction ---
    st.header('Predict Insurance Charges')

    # Collect user input
    age = st.number_input('Age', min_value=18, max_value=90, value=25)
    bmi = st.number_input('BMI', min_value=10.0, max_value=50.0, value=20.0)
    children = st.number_input('Number of Children', min_value=0, max_value=10, value=0)
    smoker = st.selectbox('Smoker', ['yes', 'no'])
    region = st.selectbox('Region', ['Southeast', 'Southwest', 'Northeast', 'Northwest'])
    sex = st.selectbox('Sex', ['female', 'male'])

    # Encode user input using the same encoders
    smoker_encoded = l_encode_smoker.transform([smoker])[0]
    region_encoded = l_encode_region.transform([region])[0]
    sex_encoded = l_encode_sex.transform([sex])[0]

    # Combine user input into a DataFrame
    input_data = pd.DataFrame([[age, bmi, children, smoker_encoded, region_encoded, sex_encoded]],
                              columns=['age', 'bmi', 'children', 'smoker', 'region', 'sex'])

    # Predict the charges using trained model
    predicted_charges = lr_model.predict(input_data)
    st.subheader(f'Predicted Insurance Charges: ${predicted_charges[0]:.2f}')

    # ---- Add Radar Chart ----
    st.header('Radar chart for Predicted Database')

    # Get min and max values of the features from the datasets for comparison
    max_values = x.max()
    min_values = x.min()

    # Normalize input values between 0 and 1
    normalized_input = (input_data.values.flatten() - min_values) / (max_values - min_values)

    # Define radar chart categories
    categories = ['Age', 'BMI', 'Children', 'Smoker', 'Region', 'Sex']

    # Define the radar chart
    fig = go.Figure()

    # Add trace for normalized user input
    fig.add_trace(go.Scatterpolar(
        r=normalized_input,
        theta=categories,
        fill='toself',
        name='User Input'
    ))

    # Add Trace for the maximum values from the datasets
    fig.add_trace(go.Scatterpolar(
        r=np.ones_like(normalized_input),
        theta=categories,
        fill='none',
        name='Max Dataset Value',
        line=dict(color='blue', dash='dash')
    ))

    # Update radar chart layout
    fig.update_layout(
        polar=dict(
            radialaxis=dict(visible=True, range=[0, 1])
        ),
        showlegend=True
    )

    # Display the radar chart
    st.plotly_chart(fig)


Overwriting health_insurance_prediction_app.py


In [26]:
!streamlit run health_insurance_prediction_app.py&>/content/logs.txt & curl ipv4.icanhazip.com

34.125.232.217


In [27]:
!npm install localtunnel@2.0.2 --force

[1mnpm[22m [33mwarn[39m [94musing --force[39m Recommended protections disabled.
[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K
up to date, audited 23 packages in 1s
[1G[0K⠧[1G[0K
[1G[0K⠧[1G[0K3 packages are looking for funding
[1G[0K⠧[1G[0K  run `npm fund` for details
[1G[0K⠧[1G[0K
2 [33m[1mmoderate[22m[39m severity vulnerabilities

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.
[1G[0K⠧[1G[0K

In [None]:
!npx localtunnel --port 8501

[1G[0K⠙[1G[0Kyour url is: https://free-rice-reply.loca.lt
