In [2]:
import pandas as pd
import numpy as np


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
data = pd.read_csv('./falldata.csv')
data.head()

Unnamed: 0,Time,a.x,a.y,a.z,gForce,fall state
0,18:24:06,2.67,-0.26,11.29,1.18,0.0
1,18:24:16,2.58,-0.19,11.33,1.18,0.0
2,18:24:26,-5.72,0.47,8.69,1.06,0.0
3,18:24:37,4.8,2.37,10.47,1.2,0.0
4,18:24:47,-9.43,-0.18,1.81,0.98,0.0


In [4]:
data.columns

Index(['Time', 'a.x', 'a.y', 'a.z', 'gForce ', 'fall state'], dtype='object')

In [5]:
data.shape


(4427, 6)

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4427 entries, 0 to 4426
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Time        4415 non-null   object 
 1   a.x         4410 non-null   float64
 2   a.y         4410 non-null   float64
 3   a.z         4410 non-null   float64
 4   gForce      4410 non-null   float64
 5   fall state  4410 non-null   float64
dtypes: float64(5), object(1)
memory usage: 207.6+ KB


In [7]:
data.isna().sum()

Time          12
a.x           17
a.y           17
a.z           17
gForce        17
fall state    17
dtype: int64

In [8]:
data.describe()

Unnamed: 0,a.x,a.y,a.z,gForce,fall state
count,4410.0,4410.0,4410.0,4410.0,4410.0
mean,-4.120336,0.218517,5.360218,1.052009,0.04195
std,5.379345,3.177906,5.11131,0.213424,0.200498
min,-33.92,-18.16,-22.33,0.0,0.0
25%,-9.49,-1.44,1.19,0.99,0.0
50%,-2.235,0.08,5.4,1.03,0.0
75%,-0.31,1.16,9.9,1.05,0.0
max,28.06,19.29,26.1,4.33,1.0


In [9]:
data.fillna(0, inplace=True)

In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4427 entries, 0 to 4426
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Time        4427 non-null   object 
 1   a.x         4427 non-null   float64
 2   a.y         4427 non-null   float64
 3   a.z         4427 non-null   float64
 4   gForce      4427 non-null   float64
 5   fall state  4427 non-null   float64
dtypes: float64(5), object(1)
memory usage: 207.6+ KB


In [11]:
for c in data.select_dtypes(include=['object']):
    data[c].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[c].fillna(0, inplace=True)


In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4427 entries, 0 to 4426
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Time        4427 non-null   object 
 1   a.x         4427 non-null   float64
 2   a.y         4427 non-null   float64
 3   a.z         4427 non-null   float64
 4   gForce      4427 non-null   float64
 5   fall state  4427 non-null   float64
dtypes: float64(5), object(1)
memory usage: 207.6+ KB


In [14]:
data.to_csv('falldata.csv')

In [17]:
data=data.drop('Time',axis=1)

In [18]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4427 entries, 0 to 4426
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   a.x         4427 non-null   float64
 1   a.y         4427 non-null   float64
 2   a.z         4427 non-null   float64
 3   gForce      4427 non-null   float64
 4   fall state  4427 non-null   float64
dtypes: float64(5)
memory usage: 173.1 KB


In [20]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
X = data.drop('fall state', axis=1)  # Features
y = data['fall state']  # Target

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
X_train

Unnamed: 0,a.x,a.y,a.z,gForce
1264,-9.62,0.79,1.40,0.99
1907,-2.11,2.98,10.57,1.14
436,0.15,-3.08,9.63,1.03
3963,-9.56,0.38,1.48,0.99
2698,-9.60,-0.07,-0.86,0.98
...,...,...,...,...
3444,0.00,1.08,10.13,1.04
466,0.15,-3.13,9.65,1.03
3092,-4.29,0.06,8.90,1.01
3772,-1.02,-0.74,9.97,1.02


In [23]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

In [24]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, y_pred_lr)
print("Mean Squared Error:", mse)
r2 = r2_score(y_test, y_pred_lr)
print("R-squared:", r2)

Mean Squared Error: 0.02387394560684262
R-squared: 0.4034014005160592


In [25]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred = log_reg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
accuracy*100

98.30699774266365

In [26]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn)*100)

KNN Accuracy: 98.87133182844244


In [27]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_gnb = gnb.predict(X_test)
print("Naive Bayes Accuracy:", accuracy_score(y_test, y_pred_gnb)*100)

Naive Bayes Accuracy: 97.40406320541761


In [28]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt)*100)

Decision Tree Accuracy: 99.09706546275395


In [29]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf)*100)

Random Forest Accuracy: 98.9841986455982
