In [23]:
import pandas as pd

# Load the data
student_data_path = '..\\BBDC\\merged_preprocessed_data_with_info_numericSamples.csv'
student_skeleton_path = './/student_skeleton.csv'

student_data = pd.read_csv(student_data_path)
student_skeleton = pd.read_csv(student_skeleton_path)


In [24]:
import pandas as pd
import numpy as np

def merge_on_closest_timestamp(skeleton, data, timestamp_column='timestamp', session_column='sessionId'):
    # Ensure data is sorted by session and timestamp
    data = data.sort_values(by=[session_column, timestamp_column])
    skeleton = skeleton.sort_values(by=[session_column, timestamp_column])

    # Create an empty DataFrame to store the merged data
    merged_data = pd.DataFrame()

    for session in skeleton[session_column].unique():
        # Filter rows by session
        skeleton_session = skeleton[skeleton[session_column] == session]
        data_session = data[data[session_column] == session]

        if data_session.empty:
            continue

        # Initialize a column to store the closest timestamps from data_session
        skeleton_session['closest_timestamp'] = np.nan

        # Iterate over the skeleton_session DataFrame and find the closest timestamp in data_session
        for index, row in skeleton_session.iterrows():
            skeleton_timestamp = row[timestamp_column]
            
            # Compute the absolute difference between the skeleton timestamp and all data_session timestamps
            abs_diff = data_session[timestamp_column].sub(skeleton_timestamp).abs()
            
            # Find the index of the minimum difference
            closest_idx = abs_diff.idxmin()

            # Store the closest timestamp in the skeleton_session DataFrame
            skeleton_session.at[index, 'closest_timestamp'] = data_session.loc[closest_idx, timestamp_column]

        # Merge the skeleton_session DataFrame with the data_session DataFrame based on the closest timestamp
        merged_session = pd.merge(skeleton_session, data_session, how='left', left_on=['sessionId', 'closest_timestamp'], right_on=[session_column, timestamp_column])

        # Append the merged_session DataFrame to the merged_data DataFrame
        merged_data = pd.concat([merged_data, merged_session])

    # Drop the 'closest_timestamp' column if no longer needed
    merged_data.drop(columns=['closest_timestamp', 'affect_x', 'context_x','notification', 'engagement', 'affect_y', 'context_y'], inplace=True)
    
    return merged_data

merged_data = merge_on_closest_timestamp(student_skeleton, student_data)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  skeleton_session['closest_timestamp'] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  skeleton_session['closest_timestamp'] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  skeleton_session['closest_timestamp'] = np.nan
A value is trying to be set on a copy of a slice from a DataFram

In [25]:
features_for_prediction = ['x', 'y', 'z', 'ppgValue', 'hr', 'hrIbi', 'hrStatus', 'ibiStatus','gender','age']
X_for_prediction = merged_data[features_for_prediction]

In [12]:
features_for_prediction = ['x', 'y', 'z', 'ppgValue', 'hr', 'hrIbi', 'hrStatus', 'ibiStatus']
X_for_prediction = merged_data[features_for_prediction]

from joblib import dump, load

# Load the model from the file
clf_loaded = load('random_forest_model.joblib')

# Now you can use clf_loaded to make predictions or evaluate it
predictions = clf_loaded.predict(X_for_prediction)

# Attach predictions to the merged_data DataFrame
merged_data['predicted_context'] = predictions[:,0]
merged_data['predicted_affect'] = predictions[:,1]

In [15]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-2.0.3-py3-none-win_amd64.whl.metadata (2.0 kB)
Downloading xgboost-2.0.3-py3-none-win_amd64.whl (99.8 MB)
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB 487.6 kB/s eta 0:03:25
   ---------------------------------------- 0.3/99.8 MB 2.2 MB/s eta 0:00:45
   ---------------------------------------- 0.7/99.8 MB 4.9 MB/s eta 0:00:21
   ---------------------------------------- 1.2/99.8 MB 6.5 MB/s eta 0:00:16
    --------------------------------------- 1.8/99.8 MB 7.7 MB/s eta 0:00:13
    --------------------------------------- 2.4/99.8 MB 9.0 MB/s eta 0:00:11
   - -------------------------------------- 3.2/99.8 MB 10.3 MB/s eta 0:00:10
   - -------------------------------------- 4.0/99.8 MB 11.1 MB/s eta 0:00:09
   - -------------------------------------- 4.9/99.8 MB 12.5 MB/s eta 0:00:08
   -- ---------

In [27]:
# Kontext Model
from sklearn.metrics import classification_report
from xgboost import XGBClassifier

xgbc_k = XGBClassifier()
xgbc_k.load_model("model_kontext.json")

predictions_context = xgbc_k.predict(X_for_prediction)

In [28]:
# Affect Model
from sklearn.metrics import classification_report
from xgboost import XGBClassifier

xgbc_k = XGBClassifier()
xgbc_k.load_model("model_affect.json")

predictions_affect = xgbc_k.predict(X_for_prediction)

In [32]:
class_mapping_affect = {
    0: "ANGRY",
    1: "HAPPY",
    2: "RELAXED",
    3: "SAD"
}

predictions_affect_mapped = [class_mapping_affect[x] for x in predictions_affect]

class_mapping_context = {
    0: "CONVERSATION",
    1: "OTHER",
    2: "VIEW_BOTH",
    3: "WALKING"
}

predictions_context_mapped = [class_mapping_context[x] for x in predictions_context]

In [33]:
# For seperate context and affect
student_skeleton_prediction = student_skeleton
student_skeleton_prediction['context'] = predictions_context_mapped
student_skeleton_prediction['affect'] = predictions_affect_mapped

#Change name of file here!
#student_skeleton_prediction.to_csv('prediction_XGBoost.csv', index=False)


In [14]:
#For both
student_skeleton_prediction = student_skeleton
student_skeleton_prediction['context'] = predictions[:,0]
student_skeleton_prediction['affect'] = predictions[:,1]

#Change name of file here!
#student_skeleton_prediction.to_csv('prediction_random_forest.csv', index=False)


In [34]:
def predict(model, test_loader):
    with torch.no_grad():
        logits = []
    
        for inputs in test_loader:
            inputs = inputs.to(device)
            model.eval()
            outputs = model(inputs).cpu() #.to(device) #.cpu()
            logits.append(outputs)
            
    probs = nn.functional.softmax(torch.cat(logits), dim=-1).numpy()
    preds = np.argmax(probs, 1)
    return preds
#probs = torch.softmax(outputs, dim=-1) #Wahrscheinlichkeit, dass eine bestimmte Ausgabe zu einer bestimmten Klasse gehört
#preds = torch.argmax(probs, 1)

In [None]:
#kontext
model_kontext = Model().to(device)
optimizer = torch.optim.AdamW(model_kontext.parameters())
criterion = nn.CrossEntropyLoss()

torch.save(model_kontext.state_dict(), "path_to\\model_kontext_5Schicten64x128_b64_earlyStop14aus25.pth") 

ausgabe_kontext = predict(model_kontext)

In [None]:
#emotions
model_affect = Model().to(device)
optimizer = torch.optim.AdamW(model_affect.parameters())
criterion = nn.CrossEntropyLoss()

torch.save(model_affect.state_dict(), "path_to\\model_affect_5Schichten_64x128_b64_earlyStop16aus25.pth") 

ausgabe_affect = predict(model_affect)