In [None]:
#Step 1: Understand Data

import pandas as pd

# Load the dataset
data = pd.read_csv("Human Activity Data.csv")

# Print properties
print("Head:\n", data.head())
print("\nShape:", data.shape)
print("\nColumns:", data.columns)
print("\nData Types:\n", data.dtypes)
print("\nInfo:\n", data.info())
print("\nValue Counts:\n", data['Activity'].value_counts())


In [None]:
#Step 2: Build a small dataset
# Filter the dataset for selected classes
small_data = data[data['Activity'].isin(['Laying', 'Sitting', 'Walking'])].groupby('Activity').head(500)

# Save the new dataset as CSV
small_data.to_csv("Small_Human_Activity_Data.csv", index=False)


In [None]:
#Step 3: Build GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the small dataset
small_data = pd.read_csv("Small_Human_Activity_Data.csv")

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    small_data.drop('Activity', axis=1), small_data['Activity'], test_size=0.2, random_state=42
)

# Create GradientBoosting Classifier
gb_classifier = GradientBoostingClassifier()
gb_classifier.fit(X_train, y_train)

# Predict and print scores
y_pred = gb_classifier.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))



In [None]:
#Step 4: Find Best no. of trees and Best Learning Rate using Grid Search and Cross Validation
from sklearn.model_selection import GridSearchCV

# Create GridSearchCV model
param_grid = {'n_estimators': [50, 100, 200, 400], 'learning_rate': [0.1, 0.01]}
grid_search_gb = GridSearchCV(GradientBoostingClassifier(), param_grid, cv=5)
grid_search_gb.fit(X_train, y_train)

# Predict and print scores
y_pred_grid_gb = grid_search_gb.best_estimator_.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred_grid_gb))
print("Classification Report:\n", classification_report(y_test, y_pred_grid_gb))
print("Best Parameters:", grid_search_gb.best_params_)



In [None]:

#Step 5: Build AdaBoostClassifie
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

# Create AdaBoost Classifier
ada_classifier = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
param_grid_ada = {'n_estimators': [150, 200, 400], 'learning_rate': [1.0, 0.01, 0.001]}
grid_search_ada = GridSearchCV(ada_classifier, param_grid_ada, cv=5)
grid_search_ada.fit(X_train, y_train)

# Predict and print scores
y_pred_ada = grid_search_ada.best_estimator_.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred_ada))
print("Classification Report:\n", classification_report(y_test, y_pred_ada))
print("Best Parameters:", grid_search_ada.best_params_)


In [None]:
#Step 6: Build LogisticRegressionCV classifier
from sklearn.linear_model import LogisticRegressionCV

# Create Logistic RegressionCV model
logreg_cv = LogisticRegressionCV(Cs=[5], cv=4, penalty='l2')
logreg_cv.fit(X_train, y_train)

# Predict and print scores
y_pred_logreg = logreg_cv.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred_logreg))


In [None]:
#Step 7: Build VotingClassifier

from sklearn.ensemble import VotingClassifier

# Create Voting Classifier
voting_classifier = VotingClassifier(estimators=[
    ('gradient_boost', gb_classifier),
    ('logistic_regression', logreg_cv)
], voting='soft')

# Fit and predict
voting_classifier.fit(X_train, y_train)
y_pred_voting = voting_classifier.predict(X_test)

# Print scores
print("Classification Report:\n", classification_report(y_test, y_pred_voting))


In [None]:
Step 8: Interpret your results

Analyze the results, consider the best parameters, and observe the performance of different classifiers.
Experiment with parameter values, try different models, and observe how they impact results.
Remember to iterate and refine your models based on the performance metrics and business requirements.





