### Model Analysis:

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set the path to your Kaggle dataset
train_data_dir = '///Users/chsmac/Downloads/dataset'

# Define parameters
img_height, img_width = 128, 128
batch_size = 32
epochs = 10

# Use data augmentation to enhance the training dataset
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # Split 20% of the data for validation
)

# Load the training dataset
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'  # Use the training subset
)

# Load the validation dataset
validation_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'  # Use the validation subset
)

# Define the CNN model
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(len(train_generator.class_indices), activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size
)

# Save the trained model
model.save('weather_image_recognition_model.h5')


### Model Evaluation:

In [None]:
import matplotlib.pyplot as plt

# Evaluate the model on the validation set
evaluation_results = model.evaluate(validation_generator)

# Print the evaluation results
print("Evaluation Result - Loss: {:.4f}, Accuracy: {:.4f}".format(evaluation_results[0], evaluation_results[1]))


# Plot the training and validation accuracy curves
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy Curves')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()


In [None]:
pip install tensorflow


In [None]:
import pymongo
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import numpy as np
import os

# Connect to MongoDB
client = pymongo.MongoClient("mongodb://localhost:27017")

# Create or select a database
db = client["weather_recognition_db"]

# Create or select a collection
collection = db["weather_images"]

# Load the trained model
model = load_model('weather_image_recognition_model.h5')

# Define a function to predict weather from an image
def predict_weather(image_path):
    img = image.load_img(image_path, target_size=(128, 128))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0) / 255.0  # Normalize the image
    prediction = model.predict(img_array)
    predicted_class = np.argmax(prediction)
    return predicted_class

# Specify the directory containing weather images
images_directory = '///Users/chsmac/Downloads/dataset'

# Iterate over images in the directory and store predictions in MongoDB
for image_filename in os.listdir(images_directory):
    if image_filename.endswith(".jpg"):
        image_path = os.path.join(images_directory, image_filename)
        predicted_weather = predict_weather(image_path)

        # Insert data into MongoDB
        image_data = {
            "filename": image_filename,
            "path": image_path,
            "predicted_weather_class": int(predicted_weather),
            "predicted_weather_label": "sunny" if predicted_weather == 0 else "cloudy" if predicted_weather == 1 else "rainy"
        }
        collection.insert_one(image_data)

# Close the MongoDB connection
client.close()

#print('jdj')

In [None]:
pip install pymongo


In [None]:
pip install pyspark


In [None]:
!pip install opencv-python

In [3]:
import os
import cv2
import numpy as np
from pyspark.sql import SparkSession, Row
from pyspark.ml.classification import RandomForestClassifier
from pyspark.ml.feature import VectorAssembler
from pyspark.ml import Pipeline
from pyspark.sql.functions import udf, col
from pyspark.sql.types import FloatType, ArrayType, StringType
from pyspark.ml.linalg import Vectors

# Spark Session
spark = SparkSession.builder.appName("ImageClassification").getOrCreate()

#preprocess images
def process_image(file_path):
    img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)  # Convert to grayscale
    img = cv2.resize(img, (32, 32))  # Resize
    img = img.flatten() / 255.0  # Normalize
    return img.tolist()

#load and process images from subdirectories
def load_and_process_images(base_directory):
    categories = ["dew", "fogsmog", "frost","glaze","hail","lightning","rain","rainbow","rime","sandstorm","snow"]  # Subfolder names
    image_data = []

    for category in categories:
        dir_path = os.path.join(base_directory, category)
        label = categories.index(category)  # Assign a numerical label based on category

        for file in os.listdir(dir_path):
            if file.endswith(('jpg', 'png', 'jpeg')):
                file_path = os.path.join(dir_path, file)
                processed_img = process_image(file_path)
                image_data.append((label, Vectors.dense(processed_img)))

    
    return spark.createDataFrame(image_data, ["label", "features"])

#images
base_directory = "/Users/chsmac/Desktop/DATA 603 Project/dataset"

image_df = load_and_process_images(base_directory)

#training and test sets
train_df, test_df = image_df.randomSplit([0.6, 0.4])

rf = RandomForestClassifier(labelCol="label", featuresCol="features", numTrees=10)
pipeline = Pipeline(stages=[rf])

model = pipeline.fit(train_df)

predictions = model.transform(test_df)

# Evaluate
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
evaluator = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction", metricName="accuracy")
accuracy = evaluator.evaluate(predictions)
print(f"Test Accuracy = {accuracy}")

error: OpenCV(4.8.1) /Users/xperience/GHA-OpenCV-Python/_work/opencv-python/opencv-python/opencv/modules/imgproc/src/resize.cpp:4062: error: (-215:Assertion failed) !ssize.empty() in function 'resize'


In [None]:
import matplotlib.pyplot as plt
from pyspark.sql.functions import when

predictions_with_accuracy = predictions.withColumn('is_correct', when(predictions.label == predictions.prediction, 1).otherwise(0))


correct_predictions = predictions_with_accuracy.filter(predictions_with_accuracy.is_correct == 1).count()
incorrect_predictions = predictions_with_accuracy.filter(predictions_with_accuracy.is_correct == 0).count()

print(f"Correctly identified images: {correct_predictions}")
print(f"Incorrectly identified images: {incorrect_predictions}")


pandas_df = predictions_with_accuracy.toPandas()


plt.figure(figsize=(10, 5))


plt.subplot(1, 2, 1)
plt.bar(['Correct', 'Incorrect'], [correct_predictions, incorrect_predictions], color=['green', 'red'])
plt.title('Classification Results')
plt.xlabel('Type')
plt.ylabel('Count')

#confusion matrix
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(pandas_df['label'], pandas_df['prediction'])
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues, ax=plt.subplot(1, 2, 2))
plt.title('Confusion Matrix')

plt.tight_layout()
plt.show()

In [None]:

test_base_directory = "/Users/kaushikmanjunatha/Downloads/data_animal/Test"


def extract_labels_from_filenames(filename):
    # Assuming your file names have labels separated by underscores (e.g., "cat_001.jpg")
    label = filename.split("_")[0]
    return label

def load_and_process_test_images(base_directory):
    image_data = []

    for file in os.listdir(base_directory):
        if file.endswith(('jpg', 'png', 'jpeg')):
            file_path = os.path.join(base_directory, file)
            processed_img = process_image(file_path)
            label = extract_labels_from_filenames(file)  # Extract label from file name
            image_data.append((label, Vectors.dense(processed_img)))


    return spark.createDataFrame(image_data, ["true_category", "features"])


test_df = load_and_process_test_images(test_base_directory)

test_predictions = model.transform(test_df)


test_predictions.select("true_category", "prediction").show()

