<a href="https://colab.research.google.com/github/indhu68/Intro_to_DL_Project/blob/main/Intro_to_DL_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install haversine



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from haversine import haversine

# Load data
df = pd.read_csv('/content/drive/My Drive/Kasungu_Telemetry_Pts_Oct23.csv', parse_dates=['Time.Stamp'])

# Assuming 'Tag' and 'Index' are columns in your CSV to create a unique identifier for each elephant
df['ElephantID'] = df['Tag'].astype(str) + '_' + df['Index'].astype(str)


In [4]:
df

Unnamed: 0,Index,Tag,Type,Latitude,Longitude,Time.Stamp,DOP,Speed,Battery,Movement,...,RSSI,Coverage,Retries,SW_Ver_,Time_Stamp,Log_Interv,Temperatur,Accelerome,SW.Ver.,ElephantID
0,10000,5748,IR-SAT Tag,-13.027828,33.133265,2022-07-14 11:08:00,10 meters,3 km/h,3.68 Volt,Y,...,5,N,4,66,7/14/2022,Every 30 minutes,41,"[1033,5505,15905]",66,5748_10000
1,10001,5744,IR-SAT Tag,-13.224110,33.157962,2022-07-14 11:10:00,<10 meters,0 km/h,3.65 Volt,Y,...,4,Y,0,66,7/14/2022,Every 30 minutes,41,"[1222,-960,15876]",66,5744_10001
2,10002,5740,IR-SAT Tag,-13.050495,33.152538,2022-07-14 11:23:00,10 meters,0 km/h,3.68 Volt,Y,...,5,N,2,66,7/14/2022,Every 30 minutes,39,"[-1007,203,15311]",66,5740_10002
3,10005,5746,IR-SAT Tag,-13.061195,33.182005,2022-07-14 11:24:00,10 meters,2 km/h,3.66 Volt,Y,...,3,N,2,66,7/14/2022,Every 30 minutes,41,"[2940,984,14900]",66,5746_10005
4,10006,5745,IR-SAT Tag,-13.007968,33.104750,2022-07-14 11:24:00,10 meters,0 km/h,3.67 Volt,Y,...,5,N,2,66,7/14/2022,Every 30 minutes,40,"[721,-1718,15198]",66,5745_10006
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
383055,9995,5751,IR-SAT Tag,-12.732593,33.183252,2023-08-16 00:04:00,10 meters,0 km/h,3.66 Volt,Y,...,2,N,3,66,8/16/2023,Every 1 hour,25,"[-441,284,15724]",66,5751_9995
383056,9996,5750,IR-SAT Tag,-12.730062,33.184862,2023-08-16 00:06:00,10 meters,2 km/h,3.64 Volt,Y,...,1,Y,0,66,8/16/2023,Every 1 hour,26,"[-1446,818,15792]",66,5750_9996
383057,9997,5757,IR-SAT Tag,-12.719173,33.082990,2023-08-16 00:12:00,10 meters,0 km/h,3.68 Volt,Y,...,4,Y,0,66,8/16/2023,Every 1 hour,23,"[-14394,1689,6386]",66,5757_9997
383058,9998,5743,IR-SAT Tag,-13.035063,33.221140,2023-08-16 00:17:00,10 meters,0 km/h,3.69 Volt,Y,...,5,Y,0,66,8/16/2023,Every 1 hour,22,"[2096,3363,15104]",66,5743_9998


In [5]:
df_new = df.drop(columns=[col for col in ['Type'] + list(df.columns)[6:20]])
df_new


Unnamed: 0,Index,Tag,Latitude,Longitude,Time.Stamp,ElephantID
0,10000,5748,-13.027828,33.133265,2022-07-14 11:08:00,5748_10000
1,10001,5744,-13.224110,33.157962,2022-07-14 11:10:00,5744_10001
2,10002,5740,-13.050495,33.152538,2022-07-14 11:23:00,5740_10002
3,10005,5746,-13.061195,33.182005,2022-07-14 11:24:00,5746_10005
4,10006,5745,-13.007968,33.104750,2022-07-14 11:24:00,5745_10006
...,...,...,...,...,...,...
383055,9995,5751,-12.732593,33.183252,2023-08-16 00:04:00,5751_9995
383056,9996,5750,-12.730062,33.184862,2023-08-16 00:06:00,5750_9996
383057,9997,5757,-12.719173,33.082990,2023-08-16 00:12:00,5757_9997
383058,9998,5743,-13.035063,33.221140,2023-08-16 00:17:00,5743_9998


In [6]:
imputer = SimpleImputer(strategy='mean')
df_new[['Latitude', 'Longitude']] = imputer.fit_transform(df_new[['Latitude', 'Longitude']])


In [7]:
def calculate_haversine(lat1, lon1, lat2, lon2):
    """Calculate the haversine distance between two points on the earth (specified in decimal degrees)."""
    # convert decimal degrees to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])

    # haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371  # Radius of earth in kilometers. Use 3956 for miles
    return c * r

# Adding time-related features
df_new['hour_of_day'] = df_new['Time.Stamp'].dt.hour
df_new['day_of_week'] = df_new['Time.Stamp'].dt.dayofweek

# Ensure data is sorted to correctly calculate distances
df_new.sort_values(by=['ElephantID', 'Time.Stamp'], inplace=True)

# Calculate shifted latitude and longitude for distance calculation
df_new['shifted_latitude'] = df_new.groupby('ElephantID')['Latitude'].shift(1)
df_new['shifted_longitude'] = df_new.groupby('ElephantID')['Longitude'].shift(1)

# Calculate haversine distance; dropping NaN values resulting from the shift operation
df_new.dropna(subset=['shifted_latitude', 'shifted_longitude'], inplace=True)
df_new['distance'] = df_new.apply(lambda x: calculate_haversine(x['Latitude'], x['Longitude'], x['shifted_latitude'], x['shifted_longitude']), axis=1)


In [8]:
scaler = StandardScaler()
features = ['Latitude', 'Longitude', 'distance', 'hour_of_day', 'day_of_week']
df_new[features] = scaler.fit_transform(df_new[features])


In [9]:
# Encode ElephantID
encoder = LabelEncoder()
df_new['ElephantID_encoded'] = encoder.fit_transform(df_new['ElephantID'])

In [10]:

# Define function to create sequences
def create_sequences(df_new, sequence_length=10):
    sequences = []
    targets = []
    elephant_ids = []
    for _, group in df_new.groupby('ElephantID_encoded'):
        if len(group) < sequence_length:
            # Skip groups with less data than our sequence length
            continue
        for i in range(len(group) - sequence_length):
            seq = group[features].iloc[i:i+sequence_length].values
            target = group[['Latitude', 'Longitude']].iloc[i+sequence_length].values
            elephant_id = group['ElephantID_encoded'].iloc[i+sequence_length]
            sequences.append(seq)
            targets.append(target)
            elephant_ids.append(elephant_id)
    return np.array(sequences), np.array(targets), np.array(elephant_ids)

sequence_length = 10
X, y, elephant_ids = create_sequences(df_new, sequence_length)
df_new

Unnamed: 0,Index,Tag,Latitude,Longitude,Time.Stamp,ElephantID,hour_of_day,day_of_week,shifted_latitude,shifted_longitude,distance,ElephantID_encoded
321739,10009,5739,-0.489047,1.202999,2023-08-16 00:40:00,5739_10009,-1.659635,-0.490606,-13.048713,33.156097,0.069321,0
154543,10062,5739,-1.412388,0.290104,2022-12-13 11:03:00,5739_10062,-0.069887,-0.984417,-13.046560,33.159227,-0.420147,1
281053,10078,5739,-0.912758,0.665118,2023-06-15 13:17:00,5739_10078,0.219158,0.003205,-13.046658,33.162120,-0.351256,2
177675,10096,5739,-0.930185,-0.274405,2023-01-15 13:46:00,5739_10096,0.219158,1.484637,-13.045980,33.163072,-0.894815,3
89069,10131,5739,-1.241329,-0.010300,2022-09-23 02:40:00,5739_10131,-1.370590,0.497015,-13.041287,33.170002,-0.640038,4
...,...,...,...,...,...,...,...,...,...,...,...,...
177054,9526,5782,-2.072543,-0.618208,2022-12-12 15:56:00,5782_9526,0.508203,-1.478228,-13.261627,33.113900,-0.654764,25923
382701,9676,5782,-0.491552,0.132597,2023-08-15 12:37:00,5782_9676,0.074635,-0.984417,-12.863330,33.033873,0.146622,25924
280644,9705,5782,0.309878,-1.306176,2023-03-12 23:33:00,5782_9705,1.664383,1.484637,-13.272192,33.130590,1.227027,25925
321640,9919,5782,-0.026736,-0.028846,2023-06-15 07:40:00,5782_9919,-0.647977,0.003205,-13.203550,33.112588,0.620930,25926
