In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
all_logs=pd.read_csv('all_logs_cleaned.csv')

In [4]:
all_logs

Unnamed: 0,entity_id,timestamp,location,confidence,event_type
0,E100000,2025-08-29 07:41:18,Hostel,0.9,card_swipes
1,E100000,2025-09-02 13:58:39,Library,0.9,card_swipes
2,E100000,2025-09-04 02:31:00,LAB_101,0.7,cctv_frames
3,E100000,2025-09-05 06:46:02,Auditorium,0.9,card_swipes
4,E100000,2025-09-10 22:12:00,Seminar Room,1.0,lab_booking
...,...,...,...,...,...
41010,E106999,2025-09-08 02:40:53,Seminar Room,0.5,text_notes
41011,E106999,2025-09-10 05:53:24,Cafeteria,0.9,card_swipes
41012,E106999,2025-09-12 22:15:09,Library,0.9,card_swipes
41013,E106999,2025-09-17 01:58:00,Main building,0.9,wifi_logs


In [5]:
# Change confidence for all text_notes entries
all_logs.loc[all_logs['event_type'] == 'text_notes', 'confidence'] = 0.8

# Optional: verify the change
print(all_logs[all_logs['event_type'] == 'text_notes'].head())


   entity_id            timestamp      location  confidence  event_type
17   E100002  2025-09-02 22:35:06           Lab         0.8  text_notes
19   E100002  2025-09-20 15:28:25       Library         0.8  text_notes
50   E100007  2025-09-15 15:42:59  Seminar Room         0.8  text_notes
58   E100008  2025-09-02 02:53:56           Lab         0.8  text_notes
60   E100008  2025-09-14 19:34:32        Hostel         0.8  text_notes


In [6]:
entity_id = 'E100694'  # change to the entity you want
entity_data = all_logs[all_logs['entity_id'] == entity_id].copy()


In [7]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
entity_data['location_encoded'] = le.fit_transform(entity_data['location'])


In [8]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
entity_data['location_encoded'] = le.fit_transform(entity_data['location'])


In [9]:
# If not already done
entity_data['timestamp'] = pd.to_datetime(entity_data['timestamp'])


In [10]:
entity_data['hour'] = entity_data['timestamp'].dt.hour
entity_data['day_of_week'] = entity_data['timestamp'].dt.dayofweek
entity_data['is_weekend'] = entity_data['day_of_week'].isin([5,6]).astype(int)


In [11]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Drop rows with NaN location for training
train_data = entity_data.dropna(subset=['location_encoded'])

X = train_data[['hour','day_of_week','is_weekend']]  # Add more features if available
y = train_data['location_encoded']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

# Predict location for a new timestamp
new_timestamp = pd.Timestamp('2025-09-16 07:00:00')
X_new = pd.DataFrame({
    'hour': [new_timestamp.hour],
    'day_of_week': [new_timestamp.dayofweek],
    'is_weekend': [int(new_timestamp.dayofweek in [5,6])]
})
pred_location = le.inverse_transform(model.predict(X_new))
print(pred_location)


['Library']


In [16]:
import google.generativeai as genai
import pandas as pd

# -------------------------------
# Configure Gemini API
# -------------------------------
genai.configure(api_key="AIzaSyA6nFY3_sRhMPkoz5MuX_RlbZNSJHZvaxE")  # Replace with your actual Gemini key
model = genai.GenerativeModel("gemini-2.5-pro")

# -------------------------------
# Assume you already have:
# new_timestamp = pd.Timestamp('2025-09-16 07:00:00')
# pred_location = 'Library'  # Example output from RF model
# -------------------------------

# Create a reasoning prompt for Gemini
prompt = f"""
I trained a Random Forest Classifier to predict the most likely location of an entity at a given timestamp.

Here’s what I did step-by-step:

I started with a dataset (entity_data) containing timestamps and known locations (encoded as location_encoded).

I dropped all rows where the location was missing.

I extracted temporal features from each timestamp:

hour: the hour of the day

day_of_week: which day of the week it is (0 = Monday, 6 = Sunday)

is_weekend: whether the day is a weekend (1) or not (0)

These features (hour, day_of_week, is_weekend) were used as input (X), and the encoded location was used as the target (y).

I split the data into training and test sets (80/20 split).

I trained a RandomForestClassifier on the training data.

After training, I used the model to predict the location for a new timestamp (2025-09-16 07:00:00).

Finally, I decoded the predicted label using the LabelEncoder (le.inverse_transform) to get the original location name.

Please generate a descriptive paragraph or explanation summarizing this workflow, as if I were describing how I built and used a machine learning model to predict an entity’s next location based on temporal features

Timestamp: {new_timestamp}
Predicted Location: {pred_location}

Output: pls give a explaination why the predicted Location was this. the explanation should be non-technical but logical and simple.
"""

# Generate reasoning
response = model.generate_content(prompt)

# Print result
print("Predicted Location:", pred_location)
print("Gemini Explanation:", response.text.strip())


Predicted Location: ['Library']
Gemini Explanation: Of course. Here is a simple, non-technical explanation for why the model made that prediction.

***

The model predicted the **Library** because it learned from the entity's past habits. Think of the model as a very observant detective that has studied the entity's entire schedule.

After looking at all the historical data, the detective noticed a very strong pattern: on weekday mornings, especially on Tuesdays around 7:00 AM, the entity is almost always at the Library.

So, when you asked for a prediction for **Tuesday, September 16th, at 7:00 AM**, the model simply followed the most powerful habit it had learned. It recognized the time and day as "a typical weekday morning" and, based on overwhelming past evidence, concluded the most probable location would be the Library.
