In [2]:
pip install pandas influxdb-client scikit-learn tensorflow


Collecting influxdb-client
  Downloading influxdb_client-1.47.0-py3-none-any.whl.metadata (64 kB)
Collecting reactivex>=4.0.4 (from influxdb-client)
  Downloading reactivex-4.0.4-py3-none-any.whl.metadata (5.5 kB)
Collecting jax>=0.3.15 (from tensorflow)
  Downloading jax-0.4.35-py3-none-any.whl.metadata (22 kB)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-1-py2.py3-none-macosx_11_0_arm64.whl.metadata (5.2 kB)
Collecting numpy>=1.21.0 (from pandas)
  Downloading numpy-1.23.5-cp311-cp311-macosx_11_0_arm64.whl.metadata (2.3 kB)
Collecting jaxlib<=0.4.35,>=0.4.34 (from jax>=0.3.15->tensorflow)
  Downloading jaxlib-0.4.35-cp311-cp311-macosx_11_0_arm64.whl.metadata (983 bytes)
Collecting ml-dtypes>=0.4.0 (from jax>=0.3.15->tensorflow)
  Downloading ml_dtypes-0.5.0-cp311-cp311-macosx_10_9_universal2.whl.metadata (21 kB)
INFO: pip is looking at multiple versions of jax to determine which version is compatible with other requirements. This could take a while.
Col

In [5]:
import pandas as pd
import numpy as np
from influxdb_client import InfluxDBClient
from sklearn.ensemble import IsolationForest
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler
import datetime

# InfluxDB connection
url = "https://us-east-1-1.aws.cloud2.influxdata.com"
token = "NFdWzEBxBFZzurEU4eBUx2J46YKDD6Wj3RCvY9M0nX4MpdV1fI_Xog6IYUdV0Qtru3pqW8KyejVrqCe0gmQaGA=="
org = "Amrita"
bucket = "gg"

client = InfluxDBClient(url=url, token=token, org=org)
query_api = client.query_api()

# Query data from InfluxDB
def fetch_data():
    query = f'''
        from(bucket: "{bucket}")
        |> range(start: -7d)
        |> filter(fn: (r) => r._measurement == "attendance_records")
        |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
    '''
    tables = query_api.query(query)
    records = []
    for table in tables:
        for record in table.records:
            records.append(record.values)
    return pd.DataFrame(records)

# Fetch and inspect data
df = fetch_data()
print("DataFrame columns:", df.columns)  # Inspect columns to ensure `_time` is present

# Check if '_time' exists in the DataFrame, otherwise handle accordingly
if '_time' in df.columns:
    df['time'] = pd.to_datetime(df['_time'])
else:
    raise ValueError("The '_time' column is missing in the fetched data. Please check the InfluxDB query or data format.")

# Proceed with data processing
df.set_index('time', inplace=True)
df['hour'] = df.index.hour
df['minute'] = df.index.minute
df = df[['roll_number', 'section', 'status', 'hour', 'minute']]

# 1. Isolation Forest for Anomaly Detection (Basic ML)
def detect_anomalies(data):
    # Prepare data for isolation forest
    isolation_forest = IsolationForest(contamination=0.05, random_state=42)
    features = data[['hour', 'minute']]
    data['anomaly'] = isolation_forest.fit_predict(features)
    data['anomaly'] = data['anomaly'].map({1: 0, -1: 1})  # Map anomalies to 1, normal to 0
    return data

df = detect_anomalies(df)

# 2. LSTM Model for Attendance Prediction
# Prepare data for LSTM
scaler = MinMaxScaler()
df[['hour', 'minute']] = scaler.fit_transform(df[['hour', 'minute']])

# Generate time series data
time_series_data = TimeseriesGenerator(df[['hour', 'minute']].values, df['anomaly'].values, length=10, batch_size=1)

# LSTM Model
model = Sequential([
    LSTM(50, activation='relu', input_shape=(10, 2)),
    Dense(1, activation='sigmoid')  # Sigmoid for binary classification
])
model.compile(optimizer='adam', loss='binary_crossentropy')

# Train the model (adjust epochs as needed)
model.fit(time_series_data, epochs=5)

# 3. Predict on new data
def predict_attendance_pattern(data):
    # Scale the data
    scaled_data = scaler.transform(data[['hour', 'minute']])
    time_series_data = TimeseriesGenerator(scaled_data, np.zeros(len(data)), length=10, batch_size=1)
    
    # Generate predictions
    predictions = model.predict(time_series_data)
    
    # Add a column to the original data for predictions, with NaN padding at the beginning
    predicted_anomalies = np.concatenate([np.full(10, np.nan), (predictions > 0.5).astype(int).flatten()])
    data['predicted_anomaly'] = predicted_anomalies
    
    return data

# Generate predictions
df = predict_attendance_pattern(df)

# Print results
print("Processed Data with Anomaly Detection and Predictions:")
print(df.head())

# Cleanup
client.close()


DataFrame columns: Index(['result', 'table', '_time', '_start', '_stop', '_measurement', 'hour',
       'minute', 'roll_number', 'section', 'status'],
      dtype='object')
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Processed Data with Anomaly Detection and Predictions:
                                 roll_number section          status  \
time                                                                   
2024-11-06 14:08:07.058893+00:00          11       B   exit_detected   
2024-11-06 14:58:14.202397+00:00          12       F  entry_detected   
2024-11-06 14:58:35.298150+00:00          12       F   exit_detected   
2024-11-06 15:03:30.964617+00:00          12       F  entry_detected   
2024-11-06 15:05:36.877041+00:00          12       F  entry_detected   

                                      hour    minute  anomaly  \
time                                                            
2024-11-06 14:08:07.058893+00:00  0.692308  0.090909        0   
2024-11-06 14:58:14.20

In [6]:
import pandas as pd
import numpy as np
from influxdb_client import InfluxDBClient
from sklearn.ensemble import IsolationForest
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler

# InfluxDB connection details
url = "https://us-east-1-1.aws.cloud2.influxdata.com"
token = "NFdWzEBxBFZzurEU4eBUx2J46YKDD6Wj3RCvY9M0nX4MpdV1fI_Xog6IYUdV0Qtru3pqW8KyejVrqCe0gmQaGA=="
org = "Amrita"
bucket = "gg"

client = InfluxDBClient(url=url, token=token, org=org)
query_api = client.query_api()

# Query data from InfluxDB
def fetch_data():
    query = f'''
        from(bucket: "{bucket}")
        |> range(start: -7d)
        |> filter(fn: (r) => r._measurement == "attendance_records")
        |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
    '''
    tables = query_api.query(query)
    records = []
    for table in tables:
        for record in table.records:
            records.append(record.values)
    return pd.DataFrame(records)

# Fetch data and process
df = fetch_data()
df['time'] = pd.to_datetime(df['_time'])
df.set_index('time', inplace=True)
df['hour'] = df.index.hour
df['minute'] = df.index.minute
df = df[['roll_number', 'section', 'status', 'hour', 'minute']]

# 1. Anomaly Detection
def detect_anomalies(data):
    isolation_forest = IsolationForest(contamination=0.05, random_state=42)
    features = data[['hour', 'minute']]
    data['anomaly'] = isolation_forest.fit_predict(features)
    data['anomaly'] = data['anomaly'].map({1: 0, -1: 1})  # Map anomalies to 1, normal to 0
    return data

df = detect_anomalies(df)

# Save anomalies to CSV
df[['roll_number', 'section', 'status', 'hour', 'minute', 'anomaly']].to_csv('attendance_anomalies.csv', index=True)

# 2. LSTM Model for Attendance Prediction
scaler = MinMaxScaler()
df[['hour', 'minute']] = scaler.fit_transform(df[['hour', 'minute']])
time_series_data = TimeseriesGenerator(df[['hour', 'minute']].values, df['anomaly'].values, length=10, batch_size=1)

# Define LSTM model
model = Sequential([
    LSTM(50, activation='relu', input_shape=(10, 2)),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy')
model.fit(time_series_data, epochs=5)

# Generate and save predictions
def predict_attendance(data):
    scaled_data = scaler.transform(data[['hour', 'minute']])
    time_series_data = TimeseriesGenerator(scaled_data, np.zeros(len(data)), length=10, batch_size=1)
    predictions = model.predict(time_series_data)
    predicted_anomalies = np.concatenate([np.full(10, np.nan), (predictions > 0.5).astype(int).flatten()])
    data['predicted_anomaly'] = predicted_anomalies
    return data

df = predict_attendance(df)
df[['roll_number', 'section', 'status', 'hour', 'minute', 'predicted_anomaly']].to_csv('attendance_predictions.csv', index=True)

# Close the InfluxDB client
client.close()


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
