In [None]:
# Importing the necessary modules
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn.preprocessing import LabelEncoder
import os
import cv2


In [None]:
#Input png file in list 
#####################################################################################
#______________________________INPUT require file here_______________________________
#####################################################################################
test_image_paths = [r'C:\Users\ethan\Downloads\target_images\target_images\line_1.png']

In [None]:
# Loading CSV file containing labels
labels_df = pd.read_csv(r'C:\Users\ethan\Downloads\alphabets_dataset\alphabet_labels.csv')

images = []
labels = []

for index, row in labels_df.iterrows():
    filename = row['file']
    label = row['label']
    img_path = os.path.join(r'C:\Users\ethan\Downloads\alphabets_dataset\alphabet_images', filename)
    img = load_img(img_path, color_mode='grayscale', target_size=(28, 28))
    img_array = img_to_array(img)
    images.append(img_array)
    labels.append(label)

images = np.array(images) / 255.0  # Normalizing pixel values between 0 and 1
labels = np.array(labels)


# Encoding string labels to integers
label_Encoder = LabelEncoder()
labels = label_Encoder.fit_transform(labels)

# Checking for class imbalances
unique, counts = np.unique(labels, return_counts=True)
class_weights = {i: 1.0 / count for i, count in enumerate(counts)}

# Splitting data for training and testing 
x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Reshaping data for the CNN input
x_train = x_train.reshape((-1, 28, 28, 1))
x_test = x_test.reshape((-1, 28, 28, 1))









In [None]:
# Creating a TF dataset
def create_dataset(images, labels, batch_size=32, shuffle=True):
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=1024)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset

train_dataset = create_dataset(x_train, y_train)
test_dataset = create_dataset(x_test, y_test, shuffle=False)

In [None]:
# Building the model for character identification
num_classes = len(np.unique(labels))

model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

model.summary()

# Compiling the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])  

# Learning rate scheduler
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                                                 patience=3, min_lr=0.001)

# Training the model with the dataset
history = model.fit(train_dataset,
                    epochs=20,
                    validation_data=test_dataset,
                    callbacks=[reduce_lr],
                    class_weight=class_weights)


# Evaluate on test data
test_loss, test_acc = model.evaluate(test_dataset)
print(f'Test accuracy: {test_acc}')


In [None]:

# Verifying image paths
import os




# Loading and preprocessing a test image
def preprocess_test_image(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError(f"Image at path {img_path} could not be loaded.")
    return img

# Slicing the test image into individual characters
def slice_image_into_characters(img, char_width=28, char_height=28):
    #L is a list of characters : 'X' if it identifies a letter and ' ' if it is just a whitespace
    L=[]
    characters = []
    h, w = img.shape
    last_x = 0
    for y in range(0, h, char_height):
        for x in range(0, w, char_width):
            char = img[y:y + char_height, x:x + char_width]
            if char.shape == (char_height, char_width):  # Ensure the character has the right dimensions
                if np.mean(char) < 5:  # Assuming very low mean intensity indicates a space
                    characters.append(' ')  # Represent space as a string ' '
                else:
                    char = char / 255.0  # Normalize pixel values
                    char = char.reshape((char_height, char_width, 1))
                    characters.append(char)
                last_x = x
    
    #Identifying what is a character and what isnt
    for char in characters:
        if isinstance(char, str):
            #print(char, end='')
            L.append(char)
        else:
            #print('X', end='')  # Placeholder for non-string characters
            L.append('X')
    
    return characters, L





# Process test images and spaces
all_characters_processed = []

for path in test_image_paths:
    try:
        img = preprocess_test_image(path)
        characters,L = slice_image_into_characters(img)
        all_characters_processed.extend(characters)
        
    except ValueError as e:
        print(e)

# Separate images and spaces
images_to_predict = []
for char in all_characters_processed:
    if isinstance(char, np.ndarray):
        images_to_predict.append(char)

# Convert images to numpy array
all_characters = np.array(images_to_predict)
statement=''
# Ensure the model is available as model
if all_characters.shape[0] > 0:
    # Make predictions
    predictions = model.predict(all_characters)

    # Map predictions to characters
    def get_character_from_prediction(prediction):
        classes = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz '  # Including space
        return classes[np.argmax(prediction)]

    predicted_characters = [get_character_from_prediction(pred) for pred in predictions]

    # Print the predicted characters
    
    
    printed_list=(''.join(predicted_characters))
   #Compare our list of characters without spaces to list L which determines where the whitespaces go
    index_L=index_pl=0
    while index_L <(len(L)):
       if L[index_L]==' ': 
           statement+=' '
           index_L+=1
       else: 
           statement+=printed_list[index_pl]
           index_L+=1
           index_pl+=1
else:
    print("No characters to predict.")



In [None]:


# Loading data from CSV file
df = pd.read_csv(r'C:\Users\ethan\Downloads\sentiment_analysis_dataset.csv')


sentences = df['line'].str.lower().tolist()
labels = df['sentiment'].tolist()

# Converting labels to numerical values
label_dict = {'Angry': 0, 'Happy': 1, 'Neutral': 2}
labels = [label_dict[label] for label in labels]

# Ensuring class balance
class_counts = pd.Series(labels).value_counts()
print("Class distribution before balancing:", class_counts)

# Tokenizing sentences
tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)
padded_sequences = pad_sequences(sequences, maxlen=50, padding='post', truncating='post')

# Calculating class weights
class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(labels), y=labels)
class_weights = dict(enumerate(class_weights))

# Splitting data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42, stratify=labels)

# Building the model for sentiment analysis
model2 = tf.keras.Sequential([
    Embedding(input_dim=5000, output_dim=64, input_length=50),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.5),
    Bidirectional(LSTM(32)),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')
])

#Compiling the model
model2.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Training the model
model2.fit(X_train, np.array(y_train), epochs=20, batch_size=32, validation_data=(X_val, np.array(y_val)), 
          class_weight=class_weights, callbacks=[early_stopping], verbose=1)

model2.summary()




In [None]:
# Sentiment Prediction

test_sentences = [statement.lower()]
test_sequences = tokenizer.texts_to_sequences(test_sentences)
padded_test_sequences = pad_sequences(test_sequences, maxlen=50, padding='post', truncating='post')
predictions = model2.predict(padded_test_sequences)

sentiments = ['Angry', 'Happy', 'Neutral']
for i, prediction in enumerate(predictions):
    print(sentiments[np.argmax(prediction)])
    