In [1]:
import pandas as pd

# Load the dataset
data = pd.read_csv('Alphabets_data.csv')

# Display the first few rows of the dataset
print(data.head())

# Get a summary of the dataset
print(data.info())

# Check for missing values
print(data.isnull().sum())


  letter  xbox  ybox  width  height  onpix  xbar  ybar  x2bar  y2bar  xybar  \
0      T     2     8      3       5      1     8    13      0      6      6   
1      I     5    12      3       7      2    10     5      5      4     13   
2      D     4    11      6       8      6    10     6      2      6     10   
3      N     7    11      6       6      3     5     9      4      6      4   
4      G     2     1      3       1      1     8     6      6      6      6   

   x2ybar  xy2bar  xedge  xedgey  yedge  yedgex  
0      10       8      0       8      0       8  
1       3       9      2       8      4      10  
2       3       7      3       7      3       9  
3       4      10      6      10      2       8  
4       5       9      1       7      5      10  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbo

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Separate features and target variable
X = data.drop('letter', axis=1)  # Features (Assuming the label column is 'Label')
y = data['letter']  # Target

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Convert labels to one-hot encoding if it's a multi-class classification problem
y_train_onehot = to_categorical(y_train)
y_test_onehot = to_categorical(y_test)

# Build a basic ANN model
model = Sequential()

# Add an input layer and a hidden layer
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))  # Input layer + first hidden layer

# Add an output layer (softmax for multi-class classification)
model.add(Dense(y_train_onehot.shape[1], activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train_onehot, epochs=20, batch_size=32, validation_data=(X_test, y_test_onehot))


ModuleNotFoundError: No module named 'tensorflow'

In [None]:
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# Define the model creation function for grid search
def create_model(hidden_layers=1, neurons=128, activation='relu', learning_rate=0.001):
    model = Sequential()
    model.add(Dense(neurons, input_dim=X_train.shape[1], activation=activation))
    
    # Add additional hidden layers if specified
    for _ in range(hidden_layers-1):
        model.add(Dense(neurons, activation=activation))
    
    model.add(Dense(y_train_onehot.shape[1], activation='softmax'))
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

# Wrap the model for use in GridSearchCV
model = KerasClassifier(build_fn=create_model, verbose=0)

# Define the parameter grid for grid search
param_grid = {
    'hidden_layers': [1, 2, 3],
    'neurons': [64, 128, 256],
    'activation': ['relu', 'tanh'],
    'batch_size': [32, 64],
    'epochs': [20, 50],
    'learning_rate': [0.001, 0.01]
}

# Perform grid search
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, y_train_onehot)

# Print the best parameters and the best score
print(f"Best: {grid_result.best_score_} using {grid_result.best_params_}")


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Get the best model from grid search
best_model = grid_result.best_estimator_

# Make predictions on the test set
y_pred = best_model.predict(X_test)

# Convert predictions back from one-hot encoding
y_pred_class = y_pred.argmax(axis=1)
y_test_class = y_test

# Evaluate model performance
accuracy = accuracy_score(y_test_class, y_pred_class)
precision = precision_score(y_test_class, y_pred_class, average='weighted')
recall = recall_score(y_test_class, y_pred_class, average='weighted')
f1 = f1_score(y_test_class, y_pred_class, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-score: {f1}")
