In [1]:
import seaborn as sns

# Load the titanic dataset
attention_data = sns.load_dataset('attention')

print("Attention Data")

print(attention_data.columns) # titanic data set
display(attention_data[['subject','attention', 'solutions', 'score']]) 

Attention Data
Index(['Unnamed: 0', 'subject', 'attention', 'solutions', 'score'], dtype='object')


Unnamed: 0,subject,attention,solutions,score
0,1,divided,1,2.0
1,2,divided,1,3.0
2,3,divided,1,3.0
3,4,divided,1,5.0
4,5,divided,1,4.0
5,6,divided,1,5.0
6,7,divided,1,5.0
7,8,divided,1,5.0
8,9,divided,1,2.0
9,10,divided,1,6.0


In [4]:
import seaborn as sns
import pandas as pd
# Preprocess the data
from sklearn.preprocessing import OneHotEncoder

# Load the titanic dataset
attention_data = sns.load_dataset('attention')

print("Attention Data")

print(attention_data.columns) # Display column names
display(attention_data[['subject', 'attention', 'solutions', 'score']]) # Display subset of columns

# Preprocessing steps
td = attention_data.copy()  # Make a copy to avoid modifying the original data
td.dropna(inplace=True) # drop rows with at least one missing value
td['subject'] = td['subject'].astype('category').cat.codes # Encoding subject as categorical
td['attention'] = td['attention'].astype('category').cat.codes # Encoding attention as categorical
td['solutions'] = td['solutions'].astype('category').cat.codes # Encoding solutions as categorical

print(td.columns)
display(td)

Attention Data
Index(['Unnamed: 0', 'subject', 'attention', 'solutions', 'score'], dtype='object')


Unnamed: 0,subject,attention,solutions,score
0,1,divided,1,2.0
1,2,divided,1,3.0
2,3,divided,1,3.0
3,4,divided,1,5.0
4,5,divided,1,4.0
5,6,divided,1,5.0
6,7,divided,1,5.0
7,8,divided,1,5.0
8,9,divided,1,2.0
9,10,divided,1,6.0


Index(['Unnamed: 0', 'subject', 'attention', 'solutions', 'score'], dtype='object')


Unnamed: 0.1,Unnamed: 0,subject,attention,solutions,score
0,0,0,0,0,2.0
1,1,1,0,0,3.0
2,2,2,0,0,3.0
3,3,3,0,0,5.0
4,4,4,0,0,4.0
5,5,5,0,0,5.0
6,6,6,0,0,5.0
7,7,7,0,0,5.0
8,8,8,0,0,2.0
9,9,9,0,0,6.0


In [3]:
# Select numerical columns
numeric_columns = attention_data.select_dtypes(include=['number'])

print("Maximums score")
print(numeric_columns[attention_data['score'] == 1].max())
print()

print("Minimums score")
print(numeric_columns[attention_data['score'] == 1].min())


Maximums score
Unnamed: 0   NaN
subject      NaN
solutions    NaN
score        NaN
dtype: float64

Minimums score
Unnamed: 0   NaN
subject      NaN
solutions    NaN
score        NaN
dtype: float64


In [3]:
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load the attention dataset using Seaborn
attention_data = sns.load_dataset('attention')

# Display the columns and a sample of the dataset
print("Attention Data Columns:")
print(attention_data.columns)
print("\nSample of the Attention Data:")
print(attention_data[['subject', 'attention', 'solutions', 'score']].head())

# Split the data into features (X) and target (y)
X = attention_data[['subject', 'attention', 'solutions']]
y = attention_data['score']

# Define a preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ['attention'])
    ],
    remainder='passthrough'
)

# Define the linear regression model
regressor = LinearRegression()

# Create a pipeline with preprocessing and linear regression
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', regressor)
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Fit the pipeline on the training data
pipeline.fit(X_train, y_train)

# Define a new observation (similar to a passenger)
new_observation = pd.DataFrame({
    'subject': [6],  # Assuming the next subject ID is 6
    'attention': ['divided'],
    'solutions': [1]
})

# Predict the score for the new observation
score_prediction = pipeline.predict(new_observation)

# Print the predicted score
print('\nPredicted Score:')
print(score_prediction[0])

Attention Data Columns:
Index(['Unnamed: 0', 'subject', 'attention', 'solutions', 'score'], dtype='object')

Sample of the Attention Data:
   subject attention  solutions  score
0        1   divided          1    2.0
1        2   divided          1    3.0
2        3   divided          1    3.0
3        4   divided          1    5.0
4        5   divided          1    4.0

Predicted Score:
4.682855275171013


In [6]:
# Import the required libraries
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

# Define the AttentionRegression global variable
attention_regression = None

# Define the AttentionRegression class
class AttentionRegression:
    def __init__(self):
        self.dt = None
        self.logreg = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        self.encoder = None

    def initAttention(self):
        # Load the attention dataset
        attention_data = pd.read_csv('attention.csv')
        
        # Preprocess the data
        X = attention_data.drop('score', axis=1)
        y = attention_data['score']
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)

        # Initialize the encoder
        self.encoder = OneHotEncoder(handle_unknown='ignore')
        self.X_train = self.encoder.fit_transform(self.X_train)
        self.X_test = self.encoder.transform(self.X_test)

        self.dt = DecisionTreeClassifier()
        self.dt.fit(self.X_train, self.y_train)

        self.logreg = LogisticRegression()
        self.logreg.fit(self.X_train, self.y_train)

    def runDecisionTree(self):
        if self.dt is None:
            print("Decision Tree model is not initialized. Please run initTitanic() first.")
            return
        y_pred_dt = self.dt.predict(self.X_test)
        accuracy_dt = accuracy_score(self.y_test, y_pred_dt)
        print('Decision Tree Classifier Accuracy: {:.2%}'.format(accuracy_dt))

    def runLogisticRegression(self):
        if self.logreg is None:
            print("Logistic Regression model is not initialized. Please run initTitanic() first.")
            return
        y_pred_logreg = self.logreg.predict(self.X_test)
        accuracy_logreg = accuracy_score(self.y_test, y_pred_logreg)
        print('Logistic Regression Accuracy: {:.2%}'.format(accuracy_logreg))

def initAttention():
    global attention_regression
    attention_regression = AttentionRegression()
    attention_regression.initAttention()
    attention_regression.runDecisionTree()
    attention_regression.runLogisticRegression()

def predictScore(attention):
    global attention_regression
    attention_df = pd.DataFrame(attention, index=[0])   
    attention_df.drop(['name'], axis=1, inplace=True)
    attention = attention_df.copy()

    # Add missing columns and fill them with default values
    missing_cols = set(attention_regression.X_train.columns) - set(attention.columns)
    for col in missing_cols:
        attention[col] = 0

    # Ensure the order of column in the passenger matches the order in the training data
    attention = attention[attention_regression.X_train.columns]

    # Preprocess the score data
    attention = attention_regression.encoder.transform(attention)

    predict = attention_regression.logreg.predict(attention)
    return predict

# Sample usage
if __name__ == "__main__":
    # Initialize the Attention model
    initAttention()

    # Predict the score
    attention = {
        'subject': ['1'],
        'attention': [0],
        'solutions': ['1'],
    }
    print(predictScore(attention))


FileNotFoundError: [Errno 2] No such file or directory: 'attention.csv'