<a href="https://colab.research.google.com/github/indhu68/Intro_to_DL_Project/blob/main/Intro_to_DL_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
!pip install haversine



In [28]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [29]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from haversine import haversine

# Load data
df = pd.read_csv('/content/drive/My Drive/Kasungu_Telemetry_Pts_Oct23.csv', parse_dates=['Time.Stamp'])

# Assuming 'Tag' and 'Index' are columns in your CSV to create a unique identifier for each elephant
df['ElephantID'] = df['Tag'].astype(str) + '_' + df['Index'].astype(str)


In [30]:
imputer = SimpleImputer(strategy='mean')
df[['Latitude', 'Longitude']] = imputer.fit_transform(df[['Latitude', 'Longitude']])


In [31]:
def calculate_haversine(lat1, lon1, lat2, lon2):
    """Calculate the haversine distance between two points on the earth (specified in decimal degrees)."""
    # convert decimal degrees to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])

    # haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371  # Radius of earth in kilometers. Use 3956 for miles
    return c * r

# Adding time-related features
df['hour_of_day'] = df['Time.Stamp'].dt.hour
df['day_of_week'] = df['Time.Stamp'].dt.dayofweek

# Ensure data is sorted to correctly calculate distances
df.sort_values(by=['ElephantID', 'Time.Stamp'], inplace=True)

# Calculate shifted latitude and longitude for distance calculation
df['shifted_latitude'] = df.groupby('ElephantID')['Latitude'].shift(1)
df['shifted_longitude'] = df.groupby('ElephantID')['Longitude'].shift(1)

# Calculate haversine distance; dropping NaN values resulting from the shift operation
df.dropna(subset=['shifted_latitude', 'shifted_longitude'], inplace=True)
df['distance'] = df.apply(lambda x: calculate_haversine(x['Latitude'], x['Longitude'], x['shifted_latitude'], x['shifted_longitude']), axis=1)


In [32]:
scaler = StandardScaler()
features = ['Latitude', 'Longitude', 'distance', 'hour_of_day', 'day_of_week']
df[features] = scaler.fit_transform(df[features])


In [34]:
# Encode ElephantID
encoder = LabelEncoder()
df['ElephantID_encoded'] = encoder.fit_transform(df['ElephantID'])

# Define function to create sequences
def create_sequences(df, sequence_length=10):
    sequences = []
    targets = []
    elephant_ids = []
    for _, group in df.groupby('ElephantID_encoded'):
        if len(group) < sequence_length:
            # Skip groups with less data than our sequence length
            continue
        for i in range(len(group) - sequence_length):
            seq = group[features].iloc[i:i+sequence_length].values
            target = group[['Latitude', 'Longitude']].iloc[i+sequence_length].values
            elephant_id = group['ElephantID_encoded'].iloc[i+sequence_length]
            sequences.append(seq)
            targets.append(target)
            elephant_ids.append(elephant_id)
    return np.array(sequences), np.array(targets), np.array(elephant_ids)

sequence_length = 10
X, y, elephant_ids = create_sequences(df, sequence_length)
df

Unnamed: 0,Index,Tag,Type,Latitude,Longitude,Time.Stamp,DOP,Speed,Battery,Movement,...,Temperatur,Accelerome,SW.Ver.,ElephantID,hour_of_day,day_of_week,shifted_latitude,shifted_longitude,distance,ElephantID_encoded
321739,10009,5739,IR-SAT Tag,-0.489047,1.202999,2023-08-16 00:40:00,10 meters,0 km/h,3.65 Volt,Y,...,24,"[1168,-940,15668]",66,5739_10009,-1.659635,-0.490606,-13.048713,33.156097,0.069321,0
154543,10062,5739,IR-SAT Tag,-1.412388,0.290104,2022-12-13 11:03:00,10 meters,2 km/h,3.64 Volt,Y,...,31,"[-2959,150,16065]",66,5739_10062,-0.069887,-0.984417,-13.046560,33.159227,-0.420147,1
281053,10078,5739,IR-SAT Tag,-0.912758,0.665118,2023-06-15 13:17:00,10 meters,4 km/h,3.64 Volt,Y,...,38,"[-2146,1820,15831]",66,5739_10078,0.219158,0.003205,-13.046658,33.162120,-0.351256,2
177675,10096,5739,IR-SAT Tag,-0.930185,-0.274405,2023-01-15 13:46:00,<10 meters,0 km/h,3.64 Volt,Y,...,36,"[-566,924,15927]",66,5739_10096,0.219158,1.484637,-13.045980,33.163072,-0.894815,3
89069,10131,5739,IR-SAT Tag,-1.241329,-0.010300,2022-09-23 02:40:00,<10 meters,0 km/h,3.63 Volt,Y,...,23,"[-13367,-2951,8061]",66,5739_10131,-1.370590,0.497015,-13.041287,33.170002,-0.640038,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177054,9526,5782,IR-SAT Tag,-2.072543,-0.618208,2022-12-12 15:56:00,10 meters,1 km/h,3.65 Volt,Y,...,36,"[-4574,3888,14820]",66,5782_9526,0.508203,-1.478228,-13.261627,33.113900,-0.654764,25923
382701,9676,5782,IR-SAT Tag,-0.491552,0.132597,2023-08-15 12:37:00,10 meters,2 km/h,3.66 Volt,Y,...,43,"[306,2382,14913]",66,5782_9676,0.074635,-0.984417,-12.863330,33.033873,0.146622,25924
280644,9705,5782,IR-SAT Tag,0.309878,-1.306176,2023-03-12 23:33:00,10 meters,0 km/h,3.66 Volt,Y,...,29,"[-15952,1507,565]",66,5782_9705,1.664383,1.484637,-13.272192,33.130590,1.227027,25925
321640,9919,5782,IR-SAT Tag,-0.026736,-0.028846,2023-06-15 07:40:00,<10 meters,0 km/h,3.66 Volt,Y,...,35,"[-3411,2673,15156]",66,5782_9919,-0.647977,0.003205,-13.203550,33.112588,0.620930,25926
