In [None]:
#Write python code to classify the heart disease data using a support vector machine and evaluate the model using various performance evaluation metrics as discussed. 
# Importing the necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns


data = pd.read_csv('heart.csv')

# Separating the features (X) and target (y)
X = data.drop('target', axis=1)  # Replace 'target' with  actual target column name
y = data['target']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data 
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize the Support Vector Machine model 
svm_model = SVC(kernel='linear', probability=True)


svm_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm_model.predict(X_test)
y_prob = svm_model.predict_proba(X_test)[:, 1]

# Evaluation metrics
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_prob)

print(f"\nAccuracy: {accuracy}")
print(f"ROC-AUC: {roc_auc}")

# Plotting the ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f"ROC curve (area = {roc_auc:.2f})")
plt.plot([0, 1], [0, 1], color="navy", linestyle="--")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Receiver Operating Characteristic (ROC) Curve")
plt.legend(loc="lower right")
plt.show()

# Visualizing the confusion matrix
plt.figure(figsize=(6,4))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()




In [None]:
#Perform the heart disease data statistical analysis and plot the various features individually.

# Importing the necessary libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset 
data = pd.read_csv('heart.csv')

# Display the first few rows of the dataset
print("First five rows of the dataset:")
print(data.head())

# Get basic statistical information
print("\nStatistical Summary:")
print(data.describe())

# Get information about categorical features
print("\nData Types:")
print(data.info())

# Plot histograms for continuous features
data.hist(bins=15, figsize=(15, 10), color='steelblue', edgecolor='black')
plt.suptitle('Histograms of Continuous Features')
plt.show()

# Plot boxplots for continuous features to check for outliers
plt.figure(figsize=(15, 10))
data.plot(kind='box', subplots=True, layout=(4, 4), sharex=False, sharey=False, figsize=(15, 10), patch_artist=True)
plt.suptitle('Box Plots of Continuous Features')
plt.show()

# Plot correlation matrix to explore relationships between features
plt.figure(figsize=(10, 8))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.show()

# Plot count plots for categorical features
categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal'] 

for feature in categorical_features:
    plt.figure(figsize=(8, 4))
    sns.countplot(data[feature], palette='Set2')
    plt.title(f'Count Plot for {feature}')
    plt.xlabel(feature)
    plt.ylabel('Count')
    plt.show()


In [None]:
#Use these methods to normalize the following group of data: 200,300,400,600,1000  
#a. min-max normalization by setting min = 0 and max = 1  
#b. z-score normalization  
#c. z-score normalization using the mean absolute deviation instead of standard 
##d. normalization by decimal scaling

import numpy as np

# Original data
data = np.array([200, 300, 400, 600, 1000])

# a. Min-Max Normalization (min = 0, max = 1)
min_val = np.min(data)
max_val = np.max(data)
min_max_normalized = (data - min_val) / (max_val - min_val)

# b. Z-Score Normalization
mean = np.mean(data)
std_dev = np.std(data)
z_score_normalized = (data - mean) / std_dev

# c. Z-Score Normalization using Mean Absolute Deviation 
mad = np.mean(np.abs(data - mean))
z_score_mad_normalized = (data - mean) / mad

# d. Normalization by Decimal Scaling
j = np.ceil(np.log10(np.max(np.abs(data))))
decimal_scaling_normalized = data / (10**j)

# Display results
print("Original Data: ", data)
print("\nMin-Max Normalized Data: ", min_max_normalized)
print("\nZ-Score Normalized Data: ", z_score_normalized)
print("\nZ-Score (MAD) Normalized Data: ", z_score_mad_normalized)
print("\nDecimal Scaling Normalized Data: ", decimal_scaling_normalized)


In [None]:
#The "Plant Growth Data Classification" dataset, the prediction task would typically involve predicting or classifying the growth milestone of plants based on the provided 
#environmental and management factors. Specifically, you would aim to predict the growth stage or milestone that a plant reaches based on variables such as soil type, 
#sunlight hours, water frequency, fertilizer type, temperature, and humidity. This 
#prediction can help in understanding how different conditions influence plant growth and can be valuable for optimizing agricultural practices or greenhouse management. 
#Perform classification of the dataset using deep neural network. 

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset 

data = pd.read_csv('plant_growth.csv')

# Separate features and labels
X = data.drop('growth_stage', axis=1)  # Drop the target column 
y = data['growth_stage']  # Target column with categorical values 

# Convert categorical target
y, class_names = pd.factorize(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# One-hot encode the target labels
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Build the Deep Neural Network
model = Sequential()

# Input layer
model.add(Dense(64, input_shape=(X_train.shape[1],), activation='relu'))

# Hidden layers
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))

# Output layer (number of classes should match the number of unique growth stages)
model.add(Dense(y_train.shape[1], activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc}")

# Predictions and classification report
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# Classification report and accuracy score
print("\nClassification Report:")
print(classification_report(y_true_classes, y_pred_classes, target_names=class_names))
print(f"\nAccuracy Score: {accuracy_score(y_true_classes, y_pred_classes)}")


In [None]:
#Predict the student grades using Linear Regression, Random Forest, Support Vector Machine.

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset 
# Assuming the dataset has columns like 'study_hours', 'attendance', 'previous_grades', etc.
data = pd.read_csv('student_grades.csv')

# Separate features (X) and target (y)
X = data.drop('final_grade', axis=1)  
y = data['final_grade'] 

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 1. Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)

# 2. Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

# 3. Support Vector Regressor (SVM)
svr_model = SVR(kernel='linear') 
svr_model.fit(X_train_scaled, y_train)
y_pred_svr = svr_model.predict(X_test_scaled)

# Evaluate the models
def evaluate_model(y_true, y_pred, model_name):
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"\n{model_name} Performance:")
    print(f"Mean Squared Error: {mse:.2f}")
    print(f"RÂ² Score: {r2:.2f}")

# Evaluate Linear Regression
evaluate_model(y_test, y_pred_lr, "Linear Regression")

# Evaluate Random Forest
evaluate_model(y_test, y_pred_rf, "Random Forest")

# Evaluate Support Vector Regressor
evaluate_model(y_test, y_pred_svr, "Support Vector Machine")


