<a href="https://colab.research.google.com/github/gks2022004/INDRA_MODEL/blob/main/Tekathon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

data = pd.read_csv('/content/cloud burst.csv', encoding='ISO-8859-1')

In [3]:
# Create a binary target variable based on precipitation
data['cloud_burst'] = (data['precipitation (mm)'] > 6).astype(int)

# Select relevant features (you can add more features as needed)
features = ['temperature_2m (°C)', 'relativehumidity_2m (%)', 'pressure_msl (hPa)']

In [4]:
# Normalize the input features
scaler = MinMaxScaler()
data[features] = scaler.fit_transform(data[features])

# Determine the sequence length
sequence_length = 10  # Adjust as needed

In [5]:
# Sequencing the data
sequences = []
targets = []

for i in range(len(data) - sequence_length):
    sequences.append(data[features].iloc[i:i+sequence_length].values)
    targets.append(data['cloud_burst'].iloc[i+sequence_length])

X = np.array(sequences)
y = np.array(targets)

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the RNN model with a sigmoid activation in the output layer
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, activation='relu', input_shape=(sequence_length, len(features))),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Use sigmoid activation for probability
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [7]:
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}')

# Make probability predictions
probabilities = model.predict(X_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: nan, Test Accuracy: 0.9873


In [8]:
# Assuming you want to save the model as "cloud_burst_model.h5"
model.save("/content/cloud_burst_trained_model")

Testing the model

In [9]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
import json

# Load the test data
test_data = pd.read_csv('/content/Test data 2.csv',  encoding='ISO-8859-1')

In [10]:
# Preprocess the test data (similar to training data preprocessing)
test_data['cloud_burst'] = (test_data['precipitation (mm)'] > 6).astype(int)
test_features = ['temperature_2m (°C)', 'relativehumidity_2m (%)', 'pressure_msl (hPa)']

scaler = MinMaxScaler()
test_data[test_features] = scaler.fit_transform(test_data[test_features])


In [11]:
sequence_length = 10  # Should be the same as used during training

test_sequences = []
test_targets = []

for i in range(len(test_data) - sequence_length):
    test_sequences.append(test_data[test_features].iloc[i:i+sequence_length].values)
    test_targets.append(test_data['cloud_burst'].iloc[i+sequence_length])

X_test = np.array(test_sequences)
y_test = np.array(test_targets)

In [12]:
# Load the trained model (assuming you've already trained and saved it)
model = tf.keras.models.load_model('cloud_burst_trained_model')  # Replace with the actual model filename

# Make probability predictions on the test data
test_probabilities = model.predict(X_test)

# Calculate and print the accuracy on the test data
accuracy = model.evaluate(X_test, y_test)[1] * 100
print(f'Test Accuracy: {accuracy:.2f}%')

Test Accuracy: 100.00%


In [13]:
# Assuming you want to evaluate the model's performance on the test data
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}')

Test Loss: nan, Test Accuracy: 1.0000


In [14]:
# Convert the results into a list of dictionaries
results = []
for i in range(len(test_data) - sequence_length):
    result_dict = {
        "input_data": test_data[test_features].iloc[i+sequence_length].to_dict(),
        "predicted_probability": float(test_probabilities[i][0])
    }
    results.append(result_dict)

In [15]:
# Convert the list of dictionaries to JSON
json_data = json.dumps(results, indent=2)

# Print or use the JSON data as needed
print(json_data)

[
  {
    "input_data": {
      "temperature_2m (\u00b0C)": 0.6748768472906402,
      "relativehumidity_2m (%)": 0.22988505747126436,
      "pressure_msl (hPa)": 0.8623853211009163
    },
    "predicted_probability": NaN
  },
  {
    "input_data": {
      "temperature_2m (\u00b0C)": 0.7389162561576351,
      "relativehumidity_2m (%)": 0.1839080459770115,
      "pressure_msl (hPa)": 0.8073394495412884
    },
    "predicted_probability": NaN
  },
  {
    "input_data": {
      "temperature_2m (\u00b0C)": 0.7881773399014776,
      "relativehumidity_2m (%)": 0.16091954022988508,
      "pressure_msl (hPa)": 0.7706422018348604
    },
    "predicted_probability": NaN
  },
  {
    "input_data": {
      "temperature_2m (\u00b0C)": 0.8029556650246304,
      "relativehumidity_2m (%)": 0.1724137931034483,
      "pressure_msl (hPa)": 0.6880733944954187
    },
    "predicted_probability": NaN
  },
  {
    "input_data": {
      "temperature_2m (\u00b0C)": 0.7980295566502461,
      "relativehumidity_2m

In [16]:
import numpy as np

# Assuming input_data is a list of sequences
input_data = [
    [0.54679802955665, 0.44827586206896547, 0.7798165137614745],
    # Add more sequences here...
]

# Convert input_data to a NumPy array and reshape it
input_array = np.array(input_data)  # Shape: (num_sequences, num_features)
input_array = input_array.reshape((1, input_array.shape[0], input_array.shape[1]))  # Add batch size (1 in this example)

# Make predictions using the model
predicted_probabilities = model.predict(input_array)

# Print the predicted probabilities
print("Predicted Probabilities:", predicted_probabilities)


Predicted Probabilities: [[nan]]
