<a href="https://colab.research.google.com/github/jmohsbeck1/jpmc_mle/blob/module2/JPMC_Office_Hours_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install openml

import openml
import pandas as pd
import sqlite3

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
titanic_data = openml.datasets.get_dataset(40945)

# Get the data and target separately
X, y, _, _ = titanic_data.get_data(target=titanic_data.default_target_attribute)

# Get the feature names from the Titanic dataset
attribute_names = titanic_data.features.values()
feature_names = [feat.name for feat in attribute_names if feat.name != titanic_data.default_target_attribute]

titanic_df = pd.DataFrame(X, columns=feature_names)
titanic_df['Survived'] = y

In [None]:
# Create a SQLite connection in memory
conn = sqlite3.connect(':memory:')

# Write the data to a SQLite table
titanic_df.to_sql('titanic', conn, if_exists='replace', index=False)

1309

In [None]:
query = "SELECT * FROM titanic LIMIT 10;"
pd.read_sql_query(query,conn)

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest,Survived
0,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2,,"St Louis, MO",1
1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,11,,"Montreal, PQ / Chesterville, ON",1
2,1,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON",0
3,1,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON",0
4,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON",0
5,1,"Anderson, Mr. Harry",male,48.0,0,0,19952,26.55,E12,S,3,,"New York, NY",1
6,1,"Andrews, Miss. Kornelia Theodosia",female,63.0,1,0,13502,77.9583,D7,S,10,,"Hudson, NY",1
7,1,"Andrews, Mr. Thomas Jr",male,39.0,0,0,112050,0.0,A36,S,,,"Belfast, NI",0
8,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53.0,2,0,11769,51.4792,C101,S,D,,"Bayside, Queens, NY",1
9,1,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C,,22.0,"Montevideo, Uruguay",0


In [None]:
type(pd.read_sql_query(query,conn))

pandas.core.frame.DataFrame

In [None]:
print(titanic_df.columns)

Index(['pclass', 'name', 'sex', 'age', 'sibsp', 'parch', 'ticket', 'fare',
       'cabin', 'embarked', 'boat', 'body', 'home.dest', 'Survived'],
      dtype='object')


In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Define preprocessing steps
numeric_features = ['age', 'fare']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

categorical_features = ['pclass', 'sex', 'embarked']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

# Preprocess the data
X_preprocessed = preprocessor.fit_transform(X)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_preprocessed, y, test_size=0.2, random_state=42)

model = LogisticRegression(solver='lbfgs', max_iter=1000)
model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Test set accuracy: {accuracy:.2f}")

Test set accuracy: 0.77


In [None]:
!pip install ipywidgets


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import ipywidgets as widgets
from IPython.display import display

def predict_survival(pclass, sex, age, fare, embarked):
    # Create a DataFrame for the input
    input_data = pd.DataFrame(
        [[pclass, sex, age, fare, embarked]],
        columns=['pclass', 'sex', 'age', 'fare', 'embarked']
    )
    
    # Preprocess the input
    input_preprocessed = preprocessor.transform(input_data)
    
    # Make a prediction
    prediction = model.predict(input_preprocessed)
    
    if prediction[0] == 1:
        print("The passenger is predicted to survive.")
    else:
        print("The passenger is predicted to perish.")

# Create widgets for user input
pclass_widget = widgets.IntSlider(min=1, max=3, step=1, value=1, description='Pclass:')
sex_widget = widgets.Dropdown(options=['male', 'female'], value='male', description='Sex:')
age_widget = widgets.FloatSlider(min=0, max=100, step=1, value=30, description='Age:')
fare_widget = widgets.FloatSlider(min=0, max=600, step=1, value=50, description='Fare:')
embarked_widget = widgets.Dropdown(options=['C', 'Q', 'S'], value='S', description='Embarked:')

# Display the widgets and bind them to the predict_survival function
widgets.interact(predict_survival, pclass=pclass_widget, sex=sex_widget, age=age_widget, fare=fare_widget, embarked=embarked_widget)

interactive(children=(IntSlider(value=1, description='Pclass:', max=3, min=1), Dropdown(description='Sex:', op…

<function __main__.predict_survival(pclass, sex, age, fare, embarked)>

In [None]:
import ipywidgets as widgets
from IPython.display import display

def execute_query(query):
    result = pd.read_sql_query(query, conn)
    return result

def on_click(_):
    query = query_input.value
    try:
        result = execute_query(query)
        display(result)
    except Exception as e:
        print(f"Error: {e}")

query_input = widgets.Textarea(
    placeholder="Enter your SQL query here...",
    layout=widgets.Layout(width="80%", height="100px")
)
query_button = widgets.Button(
    description="Execute Query",
    button_style="info"
)
query_button.on_click(on_click)
display(query_input, query_button)

Textarea(value='', layout=Layout(height='100px', width='80%'), placeholder='Enter your SQL query here...')

Button(button_style='info', description='Execute Query', style=ButtonStyle())

In [None]:
def predict_survival(pclass, sex, age, fare, embarked):
    # Create a DataFrame for the input
    input_data = pd.DataFrame(
        [[pclass, sex, age, fare, embarked]],
        columns=['pclass', 'sex', 'age', 'fare', 'embarked']
    )
    
    # Preprocess the input
    input_preprocessed = preprocessor.transform(input_data)
    
    # Make a prediction
    prediction = model.predict(input_preprocessed)
    
    if prediction[0] == 1:
        return "The passenger is predicted to survive."
    else:
        return "The passenger is predicted to perish."

# Create widgets for user input
pclass_widget = widgets.IntSlider(min=1, max=3, step=1, value=1, description='Pclass:')
sex_widget = widgets.Dropdown(options=['male', 'female'], value='male', description='Sex:')
age_widget = widgets.FloatSlider(min=0, max=100, step=1, value=30, description='Age:')
fare_widget = widgets.FloatSlider(min=0, max=600, step=1, value=50, description='Fare:')
embarked_widget = widgets.Dropdown(options=['C', 'Q', 'S'], value='S', description='Embarked:')

# Display the widgets and bind them to the predict_survival function
interactive_output = widgets.interactive_output(predict_survival, {
    'pclass': pclass_widget,
    'sex': sex_widget,
    'age': age_widget,
    'fare': fare_widget,
    'embarked': embarked_widget
})

display(widgets.VBox([widgets.HBox([pclass_widget, sex_widget, age_widget, fare_widget, embarked_widget]), interactive_output]))


VBox(children=(HBox(children=(IntSlider(value=1, description='Pclass:', max=3, min=1), Dropdown(description='S…

In [None]:
def on_predict_click(_):
    # Get the values from the widgets
    pclass = pclass_widget.value
    sex = sex_widget.value
    age = age_widget.value
    fare = fare_widget.value
    embarked = embarked_widget.value

    # Call the predict_survival function with the widget values
    prediction_text = predict_survival(pclass, sex, age, fare, embarked)
    prediction_label.value = prediction_text

# Create widgets for user input
pclass_widget = widgets.IntSlider(min=1, max=3, step=1, value=1, description='Pclass:')
sex_widget = widgets.Dropdown(options=['male', 'female'], value='male', description='Sex:')
age_widget = widgets.FloatSlider(min=0, max=100, step=1, value=30, description='Age:')
fare_widget = widgets.FloatSlider(min=0, max=600, step=1, value=50, description='Fare:')
embarked_widget = widgets.Dropdown(options=['C', 'Q', 'S'], value='S', description='Embarked:')

# Create the prediction button
predict_button = widgets.Button(description="Predict", button_style="info")
predict_button.on_click(on_predict_click)

# Create a label to display the prediction
prediction_label = widgets.Label()

# Display the widgets and the prediction label
widgets.VBox([
    widgets.HBox([pclass_widget, sex_widget, age_widget, fare_widget, embarked_widget]),
    predict_button,
    prediction_label
])


VBox(children=(HBox(children=(IntSlider(value=1, description='Pclass:', max=3, min=1), Dropdown(description='S…