In [1]:
!pip install tensorflow numpy pandas matplotlib




In [2]:
import pandas as pd
import numpy as np

# Crop List
crops = ['Rice', 'Sugarcane', 'Banana', 'Cotton', 'Groundnut']

# Create DataFrame to store the parameters
data = {
    'Crop': [],
    'Kc_Germination': [],
    'Kc_Initial': [],
    'Kc_Mid_Season': [],
    'Kc_Late_Season': [],
    'Root_Depth_Germination': [],
    'Root_Depth_Initial': [],
    'Root_Depth_Mid_Season': [],
    'Root_Depth_Late_Season': [],
    'Critical_Depletion_Germination': [],
    'Critical_Depletion_Initial': [],
    'Critical_Depletion_Mid_Season': [],
    'Critical_Depletion_Late_Season': [],
    'Growing_Season_Start': [],
    'Growing_Season_End': []
}

# Generate synthetic data (values are in expected ranges)
for crop in crops:
    data['Crop'].append(crop)
    data['Kc_Germination'].append(np.random.uniform(0.3, 0.5))
    data['Kc_Initial'].append(np.random.uniform(0.5, 0.7))
    data['Kc_Mid_Season'].append(np.random.uniform(0.7, 1.2))
    data['Kc_Late_Season'].append(np.random.uniform(0.6, 1.0))

    data['Root_Depth_Germination'].append(np.random.uniform(10, 20))
    data['Root_Depth_Initial'].append(np.random.uniform(20, 40))
    data['Root_Depth_Mid_Season'].append(np.random.uniform(40, 70))
    data['Root_Depth_Late_Season'].append(np.random.uniform(70, 100))

    data['Critical_Depletion_Germination'].append(np.random.uniform(40, 60))
    data['Critical_Depletion_Initial'].append(np.random.uniform(30, 50))
    data['Critical_Depletion_Mid_Season'].append(np.random.uniform(20, 40))
    data['Critical_Depletion_Late_Season'].append(np.random.uniform(10, 30))

    data['Growing_Season_Start'].append('2023-06-01')  # This will depend on specific crop
    data['Growing_Season_End'].append('2023-11-01')

# Create a DataFrame
df = pd.DataFrame(data)
df.head()


Unnamed: 0,Crop,Kc_Germination,Kc_Initial,Kc_Mid_Season,Kc_Late_Season,Root_Depth_Germination,Root_Depth_Initial,Root_Depth_Mid_Season,Root_Depth_Late_Season,Critical_Depletion_Germination,Critical_Depletion_Initial,Critical_Depletion_Mid_Season,Critical_Depletion_Late_Season,Growing_Season_Start,Growing_Season_End
0,Rice,0.396946,0.690916,1.164972,0.827453,18.171381,23.417834,46.606114,93.900936,57.22999,31.40677,25.273492,16.760016,2023-06-01,2023-11-01
1,Sugarcane,0.370619,0.592813,0.791128,0.876357,16.174263,29.956782,56.208394,77.232105,42.81567,49.866252,32.775127,13.208487,2023-06-01,2023-11-01
2,Banana,0.463633,0.662877,1.113004,0.721839,11.333457,22.040979,62.44854,86.148139,51.902203,39.151845,37.627988,19.937238,2023-06-01,2023-11-01
3,Cotton,0.446258,0.516929,0.769907,0.985029,19.146186,35.292108,59.392564,98.67967,59.352283,43.646967,33.077151,27.935243,2023-06-01,2023-11-01
4,Groundnut,0.40615,0.501508,1.156499,0.604914,13.578469,33.870163,47.870377,96.377969,58.35882,45.499377,21.57724,15.951032,2023-06-01,2023-11-01


In [3]:
import tensorflow as tf
from tensorflow.keras import layers, models

def build_generator():
    model = models.Sequential()
    model.add(layers.Dense(128, input_dim=100, activation='relu'))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(13, activation='linear'))  # 13 output features
    return model


In [4]:
def build_discriminator():
    model = models.Sequential()
    model.add(layers.Dense(512, input_dim=13, activation='relu'))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))  # Output: real or fake
    return model


In [5]:
def build_gan(generator, discriminator):
    discriminator.trainable = False
    model = models.Sequential()
    model.add(generator)
    model.add(discriminator)
    return model


In [7]:
# Separate numeric and non-numeric columns
numeric_columns = ['Kc_Germination', 'Kc_Initial', 'Kc_Mid_Season', 'Kc_Late_Season',
                   'Root_Depth_Germination', 'Root_Depth_Initial', 'Root_Depth_Mid_Season',
                   'Root_Depth_Late_Season', 'Critical_Depletion_Germination',
                   'Critical_Depletion_Initial', 'Critical_Depletion_Mid_Season',
                   'Critical_Depletion_Late_Season']

non_numeric_columns = ['Crop', 'Growing_Season_Start', 'Growing_Season_End']

# Now, you can separate the numeric and non-numeric data
df_numeric = df[numeric_columns].values.astype(np.float32)
df_non_numeric = df[non_numeric_columns]

# Check the shapes
print(df_numeric.shape)  # Should contain only numeric values
print(df_non_numeric.head())  # Should contain only non-numeric values



(5, 12)
        Crop Growing_Season_Start Growing_Season_End
0       Rice           2023-06-01         2023-11-01
1  Sugarcane           2023-06-01         2023-11-01
2     Banana           2023-06-01         2023-11-01
3     Cotton           2023-06-01         2023-11-01
4  Groundnut           2023-06-01         2023-11-01


In [8]:
# Training loop with numeric data
for epoch in range(epochs):
    # Train Discriminator
    idx = np.random.randint(0, df_numeric.shape[0], batch_size)
    real_data = df_numeric[idx]  # Now using the numeric data
    noise = np.random.normal(0, 1, (batch_size, z_dim))
    fake_data = generator.predict(noise)

    # Label smoothing (real = 0.9 to help stabilize training)
    real_labels = np.ones((batch_size, 1)) * 0.9
    fake_labels = np.zeros((batch_size, 1))

    # Train the discriminator
    d_loss_real = discriminator.train_on_batch(real_data, real_labels)
    d_loss_fake = discriminator.train_on_batch(fake_data, fake_labels)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train Generator
    noise = np.random.normal(0, 1, (batch_size, z_dim))
    valid_labels = np.ones((batch_size, 1))
    g_loss = gan.train_on_batch(noise, valid_labels)

    if epoch % 1 == 0:  # Log the loss for every epoch
        print(f"{epoch+1} [D loss: {d_loss[0]}] [G loss: {g_loss}]")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 193ms/step


ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense_4" is incompatible with the layer: expected axis -1 of input shape to have value 13, but received input with shape (32, 12)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(32, 12), dtype=float32)
  • training=True
  • mask=None

In [9]:
def build_generator():
    model = tf.keras.Sequential()
    model.add(Dense(128, activation='relu', input_dim=z_dim))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(512, activation='relu'))
    model.add(Dense(1024, activation='relu'))
    model.add(Dense(12, activation='tanh'))  # 12 output features for numeric data
    return model

def build_discriminator():
    model = tf.keras.Sequential()
    model.add(Dense(1024, activation='relu', input_dim=12))  # Expect 12 features
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification
    return model


In [11]:
# One-hot encode the Crop column
crop_one_hot = pd.get_dummies(df['Crop'])

# Convert df_numeric (NumPy array) to DataFrame
df_numeric_df = pd.DataFrame(df_numeric, columns=numeric_columns)

# Combine the one-hot encoded crop data with the numeric data
df_with_crop = pd.concat([df_numeric_df, crop_one_hot], axis=1)

# Check the shape of the new data (should now have 13 columns)
print(df_with_crop.shape)


(5, 17)


In [17]:
# First, create the models by calling the functions
discriminator = build_discriminator()  # Instantiate the discriminator model
generator = build_generator()  # Instantiate the generator model

# Compile the discriminator
discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Create the GAN model (stack the generator and discriminator)
discriminator.trainable = False  # We don't want to train the discriminator when training the generator
gan = build_gan(generator, discriminator)  # The GAN model that combines the generator and discriminator

# Now you can proceed with the training loop using the instantiated models
for epoch in range(epochs):
    # Train Discriminator
    idx = np.random.randint(0, df_with_crop.shape[0], batch_size)
    real_data = df_with_crop.iloc[idx].values  # Get the real data
    noise = np.random.normal(0, 1, (batch_size, z_dim))  # Generate random noise for the generator
    fake_data = generator.predict(noise)  # Generate fake data using the generator

    # Label smoothing (real = 0.9 to help stabilize training)
    real_labels = np.ones((batch_size, 1)) * 0.9
    fake_labels = np.zeros((batch_size, 1))

    # Train the discriminator
    d_loss_real = discriminator.train_on_batch(real_data, real_labels)
    d_loss_fake = discriminator.train_on_batch(fake_data, fake_labels)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train Generator
    noise = np.random.normal(0, 1, (batch_size, z_dim))  # Generate noise for generator input
    valid_labels = np.ones((batch_size, 1))  # We want the generator to fool the discriminator
    g_loss = gan.train_on_batch(noise, valid_labels)  # Train the GAN by updating the generator

    if epoch % 1 == 0:  # Log the loss for every epoch
        print(f"{epoch+1} [D loss: {d_loss[0]}] [G loss: {g_loss}]")


NameError: name 'Dense' is not defined

In [18]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization

# Function to build the generator model
def build_generator(z_dim):
    model = Sequential()
    model.add(Dense(128, activation='relu', input_dim=z_dim))  # Input is the random noise (latent vector)
    model.add(Dense(256, activation='relu'))
    model.add(Dense(512, activation='relu'))
    model.add(Dense(1024, activation='relu'))
    model.add(Dense(12, activation='tanh'))  # Output layer: 12 features for the generated data
    return model

# Function to build the discriminator model
def build_discriminator():
    model = Sequential()
    model.add(Dense(1024, activation='relu', input_dim=12))  # Expect 12 features as input
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification (real or fake)
    return model

# Function to build the GAN model
def build_gan(generator, discriminator):
    model = Sequential()
    model.add(generator)
    model.add(discriminator)
    return model


In [19]:
# Instantiate the models
discriminator = build_discriminator()
generator = build_generator(z_dim=100)  # Define your z_dim (latent vector size)

# Compile the discriminator model
discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Create the GAN model by stacking the generator and discriminator
discriminator.trainable = False  # Freeze the discriminator during generator training
gan = build_gan(generator, discriminator)

# Compile the GAN model
gan.compile(loss='binary_crossentropy', optimizer='adam')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [22]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder

# Step 1: Load your data
# Assuming df is your dataframe containing both numeric and non-numeric (categorical) columns
# Example structure: df has 'Crop', 'SoilType', 'Month', etc.

# Example of one possible structure
# df = pd.DataFrame({
#     'Crop': ['rice', 'sugarcane', 'banana', 'rice', 'sugarcane'],
#     'SoilType': ['red loamy', 'black cotton', 'red loamy', 'black cotton', 'red loamy'],
#     'Kc_stage1': [0.8, 1.0, 0.7, 0.8, 1.0],
#     'Kc_stage2': [1.1, 1.2, 1.0, 1.1, 1.2],
#     'Root_depth_stage1': [50, 60, 45, 50, 60],
#     'Root_depth_stage2': [75, 85, 70, 75, 85],
#     'Critical_depletion': [30, 40, 35, 30, 40]
# })

# Step 2: One-hot encode the categorical columns
encoder = OneHotEncoder(sparse=False)
categorical_columns = ['Crop', 'SoilType']  # Define your categorical columns

# Perform one-hot encoding and get a DataFrame
encoded_categories = encoder.fit_transform(df[categorical_columns])
encoded_df = pd.DataFrame(encoded_categories, columns=encoder.get_feature_names_out(categorical_columns))

# Step 3: Combine one-hot encoded data with numeric columns
df_numeric = df.drop(columns=categorical_columns)  # Drop categorical columns from original df
df_combined = pd.concat([df_numeric, encoded_df], axis=1)  # Combine numeric and encoded categorical data

# Step 4: Ensure all data is in float32 for TensorFlow compatibility
df_combined = df_combined.astype(np.float32)

# Step 5: Define the model architecture (same as before but adjusted for this new data)
# Assuming the shape of the combined data is (batch_size, 12)

# Build the Discriminator
def build_discriminator(input_shape=(12,)):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(1024, activation='relu', input_shape=input_shape))
    model.add(tf.keras.layers.Dense(512, activation='relu'))
    model.add(tf.keras.layers.Dense(256, activation='relu'))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))  # Output layer (real or fake)
    return model

# Build the Generator
def build_generator(z_dim=100):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(256, activation='relu', input_dim=z_dim))
    model.add(tf.keras.layers.Dense(512, activation='relu'))
    model.add(tf.keras.layers.Dense(1024, activation='relu'))
    model.add(tf.keras.layers.Dense(12, activation='tanh'))  # Output layer matching the input shape (12 features)
    return model

# Build the GAN
def build_gan(generator, discriminator):
    discriminator.trainable = False  # Freeze discriminator when training GAN
    model = tf.keras.Sequential()
    model.add(generator)
    model.add(discriminator)
    return model

# Instantiate models
discriminator = build_discriminator(input_shape=(df_combined.shape[1],))
generator = build_generator(z_dim=100)
gan = build_gan(generator, discriminator)

# Compile the models
discriminator.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002), metrics=['accuracy'])
gan.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002))

# Step 6: Training Loop
epochs = 11  # Number of epochs
batch_size = 32
z_dim = 100  # Latent vector size

for epoch in range(epochs):
    # Train the Discriminator
    idx = np.random.randint(0, df_combined.shape[0], batch_size)
    real_data = df_combined.iloc[idx].values  # Get a batch of real data

    # Generate fake data
    noise = np.random.normal(0, 1, (batch_size, z_dim))  # Generate random noise
    fake_data = generator.predict(noise)  # Generate fake data using the generator

    # Labels: real = 1, fake = 0
    real_labels = np.ones((batch_size, 1)) * 0.9  # Slight label smoothing for stability
    fake_labels = np.zeros((batch_size, 1))

    # Train the discriminator on real and fake data
    d_loss_real = discriminator.train_on_batch(real_data, real_labels)
    d_loss_fake = discriminator.train_on_batch(fake_data, fake_labels)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)  # Average the losses

    # Train the Generator
    noise = np.random.normal(0, 1, (batch_size, z_dim))
    valid_labels = np.ones((batch_size, 1))  # Generator wants to fool the discriminator
    g_loss = gan.train_on_batch(noise, valid_labels)

    # Print losses for every epoch
    if epoch % 1 == 0:
        print(f"{epoch+1}/{epochs} [D loss: {d_loss[0]}] [G loss: {g_loss}]")



TypeError: OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'

In [23]:
from sklearn.preprocessing import OneHotEncoder

# Step 2: One-hot encode the categorical columns
encoder = OneHotEncoder(sparse_output=False)  # Updated argument name
categorical_columns = ['Crop', 'SoilType']  # Define your categorical columns

# Perform one-hot encoding and get a DataFrame
encoded_categories = encoder.fit_transform(df[categorical_columns])
encoded_df = pd.DataFrame(encoded_categories, columns=encoder.get_feature_names_out(categorical_columns))

# Step 3: Combine one-hot encoded data with numeric columns
df_numeric = df.drop(columns=categorical_columns)  # Drop categorical columns from original df
df_combined = pd.concat([df_numeric, encoded_df], axis=1)  # Combine numeric and encoded categorical data

# Step 4: Ensure all data is in float32 for TensorFlow compatibility
df_combined = df_combined.astype(np.float32)

# Proceed with the rest of your code...


KeyError: "['SoilType'] not in index"

In [25]:
import pandas as pd

# Load your DataFrame (replace this with your actual df loading code)
# df = pd.read_csv('your_data.csv')

# Check if the column names are correct
print(df.columns)  # This will show all the column names in df

# For example, if 'SoilType' had a typo, fix it by using the correct name
categorical_columns = ['Crop', 'SoilType']  # Ensure these are valid columns


Index(['Crop', 'Kc_Germination', 'Kc_Initial', 'Kc_Mid_Season',
       'Kc_Late_Season', 'Root_Depth_Germination', 'Root_Depth_Initial',
       'Root_Depth_Mid_Season', 'Root_Depth_Late_Season',
       'Critical_Depletion_Germination', 'Critical_Depletion_Initial',
       'Critical_Depletion_Mid_Season', 'Critical_Depletion_Late_Season',
       'Growing_Season_Start', 'Growing_Season_End'],
      dtype='object')


In [26]:
from sklearn.preprocessing import OneHotEncoder

# Step 2: One-hot encode the categorical columns
encoder = OneHotEncoder(sparse_output=False)  # Updated argument name
categorical_columns = ['Crop', 'SoilType']  # Define your categorical columns

# Perform one-hot encoding and get a DataFrame
encoded_categories = encoder.fit_transform(df[categorical_columns])
encoded_df = pd.DataFrame(encoded_categories, columns=encoder.get_feature_names_out(categorical_columns))

# Step 3: Combine one-hot encoded data with numeric columns
df_numeric = df.drop(columns=categorical_columns)  # Drop categorical columns from original df
df_combined = pd.concat([df_numeric, encoded_df], axis=1)  # Combine numeric and encoded categorical data

# Step 4: Ensure all data is in float32 for TensorFlow compatibility
df_combined = df_combined.astype(np.float32)

# Proceed with the rest of your code...


KeyError: "['SoilType'] not in index"

In [27]:
df.columns = df.columns.str.strip()  # Strip any leading/trailing spaces
print(df.columns)  # Check if 'SoilType' is now listed correctly


Index(['Crop', 'Kc_Germination', 'Kc_Initial', 'Kc_Mid_Season',
       'Kc_Late_Season', 'Root_Depth_Germination', 'Root_Depth_Initial',
       'Root_Depth_Mid_Season', 'Root_Depth_Late_Season',
       'Critical_Depletion_Germination', 'Critical_Depletion_Initial',
       'Critical_Depletion_Mid_Season', 'Critical_Depletion_Late_Season',
       'Growing_Season_Start', 'Growing_Season_End'],
      dtype='object')


In [28]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# Check the columns of the DataFrame
print("Columns in the DataFrame:", df.columns)  # This will help identify the issue

# Strip any extra spaces from the column names to avoid hidden issues
df.columns = df.columns.str.strip()

# Check again if 'SoilType' and 'Crop' exist in the DataFrame
print("Columns after stripping spaces:", df.columns)

# Ensure the correct columns are selected for one-hot encoding
categorical_columns = ['Crop', 'SoilType']  # Ensure these are valid columns

# If 'SoilType' is not found, check if there is a typo or other column name
missing_columns = [col for col in categorical_columns if col not in df.columns]
if missing_columns:
    print(f"Missing columns: {missing_columns}")
else:
    # Perform one-hot encoding if columns exist
    encoder = OneHotEncoder(sparse_output=False)
    encoded_categories = encoder.fit_transform(df[categorical_columns])

    # Create a DataFrame from the one-hot encoded categories
    encoded_df = pd.DataFrame(encoded_categories, columns=encoder.get_feature_names_out(categorical_columns))

    # Step 3: Combine one-hot encoded data with numeric columns
    df_numeric = df.drop(columns=categorical_columns)  # Drop categorical columns from original df
    df_combined = pd.concat([df_numeric, encoded_df], axis=1)  # Combine numeric and encoded categorical data

    # Step 4: Ensure all data is in float32 for TensorFlow compatibility
    df_combined = df_combined.astype(np.float32)

    print("Data preprocessing complete.")


Columns in the DataFrame: Index(['Crop', 'Kc_Germination', 'Kc_Initial', 'Kc_Mid_Season',
       'Kc_Late_Season', 'Root_Depth_Germination', 'Root_Depth_Initial',
       'Root_Depth_Mid_Season', 'Root_Depth_Late_Season',
       'Critical_Depletion_Germination', 'Critical_Depletion_Initial',
       'Critical_Depletion_Mid_Season', 'Critical_Depletion_Late_Season',
       'Growing_Season_Start', 'Growing_Season_End'],
      dtype='object')
Columns after stripping spaces: Index(['Crop', 'Kc_Germination', 'Kc_Initial', 'Kc_Mid_Season',
       'Kc_Late_Season', 'Root_Depth_Germination', 'Root_Depth_Initial',
       'Root_Depth_Mid_Season', 'Root_Depth_Late_Season',
       'Critical_Depletion_Germination', 'Critical_Depletion_Initial',
       'Critical_Depletion_Mid_Season', 'Critical_Depletion_Late_Season',
       'Growing_Season_Start', 'Growing_Season_End'],
      dtype='object')
Missing columns: ['SoilType']


In [29]:
# Create temporary columns if missing (for testing purposes)
if 'SoilType' not in df.columns:
    df['SoilType'] = 'Red Loamy'  # Example, replace with valid data

if 'Crop' not in df.columns:
    df['Crop'] = 'Rice'  # Example, replace with valid data


In [30]:
from sklearn.preprocessing import OneHotEncoder

# Step 2: One-hot encode the categorical columns
encoder = OneHotEncoder(sparse_output=False)  # Updated argument name
categorical_columns = ['Crop', 'SoilType']  # Define your categorical columns

# Perform one-hot encoding and get a DataFrame
encoded_categories = encoder.fit_transform(df[categorical_columns])
encoded_df = pd.DataFrame(encoded_categories, columns=encoder.get_feature_names_out(categorical_columns))

# Step 3: Combine one-hot encoded data with numeric columns
df_numeric = df.drop(columns=categorical_columns)  # Drop categorical columns from original df
df_combined = pd.concat([df_numeric, encoded_df], axis=1)  # Combine numeric and encoded categorical data

# Step 4: Ensure all data is in float32 for TensorFlow compatibility
df_combined = df_combined.astype(np.float32)

# Proceed with the rest of your code...


ValueError: could not convert string to float: '2023-06-01'

In [31]:
# Convert date column to datetime type
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# If you want to convert the date into a numerical value (e.g., days since a reference date)
df['Date'] = (df['Date'] - pd.to_datetime('1970-01-01')).dt.total_seconds()

# Now you can drop the 'Date' column if you don't need it or keep it for modeling.


KeyError: 'Date'

In [32]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# Check column names and data types
print(df.columns)
print(df.dtypes)

# Example of handling date columns
# If there is a column containing dates (e.g., 'PlantingDate', 'HarvestDate', etc.)
# Replace 'PlantingDate' with your actual column name if it's different
date_columns = ['PlantingDate']  # List any date columns in your dataset

for date_col in date_columns:
    if date_col in df.columns:
        # Convert the column to datetime
        df[date_col] = pd.to_datetime(df[date_col], errors='coerce')

        # Convert the date to a numerical value (number of seconds since Unix epoch)
        df[date_col] = (df[date_col] - pd.to_datetime('1970-01-01')).dt.total_seconds()

# Perform one-hot encoding on categorical columns
encoder = OneHotEncoder(sparse_output=False)
categorical_columns = ['Crop', 'SoilType']  # Adjust based on your actual categorical columns

# Perform one-hot encoding and get a DataFrame
encoded_categories = encoder.fit_transform(df[categorical_columns])
encoded_df = pd.DataFrame(encoded_categories, columns=encoder.get_feature_names_out(categorical_columns))

# Combine one-hot encoded data with numeric columns
df_numeric = df.drop(columns=categorical_columns)  # Drop categorical columns
df_combined = pd.concat([df_numeric, encoded_df], axis=1)  # Combine numeric and encoded data

# Convert numeric columns to float32
df_combined = df_combined.apply(pd.to_numeric, errors='ignore')  # Convert all numeric columns
df_combined = df_combined.astype(np.float32, errors='ignore')  # Ensure the correct type for numeric data

# Now df_combined is ready for model training or further analysis


Index(['Crop', 'Kc_Germination', 'Kc_Initial', 'Kc_Mid_Season',
       'Kc_Late_Season', 'Root_Depth_Germination', 'Root_Depth_Initial',
       'Root_Depth_Mid_Season', 'Root_Depth_Late_Season',
       'Critical_Depletion_Germination', 'Critical_Depletion_Initial',
       'Critical_Depletion_Mid_Season', 'Critical_Depletion_Late_Season',
       'Growing_Season_Start', 'Growing_Season_End', 'SoilType'],
      dtype='object')
Crop                               object
Kc_Germination                    float64
Kc_Initial                        float64
Kc_Mid_Season                     float64
Kc_Late_Season                    float64
Root_Depth_Germination            float64
Root_Depth_Initial                float64
Root_Depth_Mid_Season             float64
Root_Depth_Late_Season            float64
Critical_Depletion_Germination    float64
Critical_Depletion_Initial        float64
Critical_Depletion_Mid_Season     float64
Critical_Depletion_Late_Season    float64
Growing_Season_Start      

  df_combined = df_combined.apply(pd.to_numeric, errors='ignore')  # Convert all numeric columns


In [33]:
# Define the Discriminator model
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU, Dropout

def build_discriminator(input_dim=12):
    model = Sequential()
    model.add(Dense(1024, activation=LeakyReLU(alpha=0.2), input_dim=input_dim))
    model.add(Dense(512, activation=LeakyReLU(alpha=0.2)))
    model.add(Dense(256, activation=LeakyReLU(alpha=0.2)))
    model.add(Dense(1, activation='sigmoid'))  # Sigmoid for binary classification (real/fake)
    return model

discriminator = build_discriminator(input_dim=12)
discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [34]:
# Define the Generator model
from tensorflow.keras.layers import Dense, Reshape, BatchNormalization

def build_generator(z_dim=100, output_dim=12):
    model = Sequential()
    model.add(Dense(256, activation=LeakyReLU(alpha=0.2), input_dim=z_dim))
    model.add(Dense(512, activation=LeakyReLU(alpha=0.2)))
    model.add(Dense(1024, activation=LeakyReLU(alpha=0.2)))
    model.add(Dense(output_dim, activation='tanh'))  # Output layer for 12 features
    return model

generator = build_generator(z_dim=100, output_dim=12)


In [35]:
# Define the GAN model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input

def build_gan(generator, discriminator):
    discriminator.trainable = False  # Freeze the discriminator during GAN training
    z = Input(shape=(100,))  # Latent space input
    generated_data = generator(z)
    validity = discriminator(generated_data)  # Discriminator's output
    model = Model(z, validity)
    model.compile(loss='binary_crossentropy', optimizer='adam')
    return model

gan = build_gan(generator, discriminator)


In [37]:
# Define the Discriminator model with the correct input dimension
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU

def build_discriminator(input_dim=12):
    model = Sequential()
    model.add(Dense(1024, activation=LeakyReLU(alpha=0.2), input_dim=input_dim))
    model.add(Dense(512, activation=LeakyReLU(alpha=0.2)))
    model.add(Dense(256, activation=LeakyReLU(alpha=0.2)))
    model.add(Dense(1, activation='sigmoid'))  # Sigmoid for binary classification (real/fake)
    return model

discriminator = build_discriminator(input_dim=12)  # Ensure it's 12 features
discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [39]:
# If you have 12 numeric features and additional one-hot encoded columns, count the total number of columns
input_dim = len(df_combined.columns)  # Update this based on your final number of features

# Update the discriminator with the correct input dimension
discriminator = build_discriminator(input_dim=input_dim)
discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [40]:
# Training loop
for epoch in range(epochs):
    # Select a random batch of real data
    idx = np.random.randint(0, real_data.shape[0], half_batch)
    real_batch = real_data[idx]

    # Check if the shape of real_batch is correct
    print(f"Shape of real_batch: {real_batch.shape}")  # This should be (half_batch, 12)

    # Generate a batch of fake data using the generator
    noise = np.random.normal(0, 1, (half_batch, z_dim))  # Latent space
    fake_batch = generator.predict(noise)

    # Check if the shape of fake_batch is correct
    print(f"Shape of fake_batch: {fake_batch.shape}")  # This should be (half_batch, 12)

    # Train the discriminator on real and fake data
    d_loss_real = discriminator.train_on_batch(real_batch, real_labels)
    d_loss_fake = discriminator.train_on_batch(fake_batch, fake_labels)

    # Average discriminator loss
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator
    noise = np.random.normal(0, 1, (batch_size, z_dim))  # Latent space
    g_loss = gan.train_on_batch(noise, real_labels)  # Use real labels to fool the discriminator

    # Print progress
    print(f"{epoch+1}/{epochs} [D loss: {d_loss[0]:.4f}, D accuracy: {d_loss[1]*100:.2f}%] [G loss: {g_loss:.4f}]")


Shape of real_batch: (32, 20)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
Shape of fake_batch: (32, 12)


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).

In [41]:
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import numpy as np

# Example dataframe (replace with your own dataframe)
df = pd.DataFrame({
    'Crop': ['Rice', 'Banana', 'Sugarcane', 'Rice'],
    'SoilType': ['Red Loamy', 'Black Cotton', 'Red Loamy', 'Black Cotton'],
    'Date': ['2023-06-01', '2023-06-02', '2023-06-03', '2023-06-04'],
    'Irrigation': [200, 250, 180, 210],
    'Rainfall': [120, 110, 130, 140]
})

# Define the categorical columns
categorical_columns = ['Crop', 'SoilType']

# Apply OneHotEncoding
encoder = OneHotEncoder(sparse=False)
encoded_categories = encoder.fit_transform(df[categorical_columns])

# Convert the one-hot encoded columns into a dataframe
encoded_df = pd.DataFrame(encoded_categories, columns=encoder.get_feature_names_out(categorical_columns))

# Drop original categorical columns
df_combined = pd.concat([df.drop(columns=categorical_columns), encoded_df], axis=1)

# Check the final dataframe
print(df_combined.head())


TypeError: OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'

In [42]:
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

# Example dataframe (replace with your own dataframe)
df = pd.DataFrame({
    'Crop': ['Rice', 'Banana', 'Sugarcane', 'Rice'],
    'SoilType': ['Red Loamy', 'Black Cotton', 'Red Loamy', 'Black Cotton'],
    'Date': ['2023-06-01', '2023-06-02', '2023-06-03', '2023-06-04'],
    'Irrigation': [200, 250, 180, 210],
    'Rainfall': [120, 110, 130, 140]
})

# Define the categorical columns
categorical_columns = ['Crop', 'SoilType']

# Apply OneHotEncoding (corrected)
encoder = OneHotEncoder(sparse_output=False)  # sparse=False is replaced by sparse_output=False
encoded_categories = encoder.fit_transform(df[categorical_columns])

# Convert the one-hot encoded columns into a dataframe
encoded_df = pd.DataFrame(encoded_categories, columns=encoder.get_feature_names_out(categorical_columns))

# Drop original categorical columns
df_combined = pd.concat([df.drop(columns=categorical_columns), encoded_df], axis=1)

# Check the final dataframe
print(df_combined.head())


         Date  Irrigation  Rainfall  Crop_Banana  Crop_Rice  Crop_Sugarcane  \
0  2023-06-01         200       120          0.0        1.0             0.0   
1  2023-06-02         250       110          1.0        0.0             0.0   
2  2023-06-03         180       130          0.0        0.0             1.0   
3  2023-06-04         210       140          0.0        1.0             0.0   

   SoilType_Black Cotton  SoilType_Red Loamy  
0                    0.0                 1.0  
1                    1.0                 0.0  
2                    0.0                 1.0  
3                    1.0                 0.0  


In [43]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU
from tensorflow.keras.optimizers import Adam


In [44]:
def build_discriminator(input_dim):
    model = Sequential()
    model.add(Dense(128, input_dim=input_dim, activation=LeakyReLU(0.2)))
    model.add(Dense(64, activation=LeakyReLU(0.2)))
    model.add(Dense(32, activation=LeakyReLU(0.2)))
    model.add(Dense(1, activation='sigmoid'))  # Output layer to classify real or fake
    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5), metrics=['accuracy'])
    return model

def build_generator(latent_dim, output_dim):
    model = Sequential()
    model.add(Dense(64, input_dim=latent_dim, activation=LeakyReLU(0.2)))
    model.add(Dense(128, activation=LeakyReLU(0.2)))
    model.add(Dense(256, activation=LeakyReLU(0.2)))
    model.add(Dense(output_dim, activation='tanh'))  # Output layer matches the data dimension
    return model


In [46]:
def build_gan(generator, discriminator):
    # Freeze the discriminator during GAN training
    discriminator.trainable = False
    model = Sequential()
    model.add(generator)
    model.add(discriminator)
    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5))
    return model


In [47]:
def train_gan(epochs, batch_size, df_combined, generator, discriminator, gan, latent_dim):
    half_batch = batch_size // 2

    for epoch in range(epochs):
        # Train the discriminator

        # Select a random half batch of real data
        real_batch = df_combined.sample(n=half_batch)
        real_labels = np.ones((half_batch, 1))  # Labels for real data

        # Generate fake data
        noise = np.random.normal(0, 1, (half_batch, latent_dim))  # Latent space noise
        fake_batch = generator.predict(noise)
        fake_labels = np.zeros((half_batch, 1))  # Labels for fake data

        # Train the discriminator on real and fake data
        d_loss_real = discriminator.train_on_batch(real_batch, real_labels)
        d_loss_fake = discriminator.train_on_batch(fake_batch, fake_labels)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train the generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))  # Latent space noise
        valid_labels = np.ones((batch_size, 1))  # Labels for generator to fool the discriminator
        g_loss = gan.train_on_batch(noise, valid_labels)

        # Print the progress
        if epoch % 100 == 0:
            print(f"Epoch {epoch} | D Loss: {d_loss[0]} | G Loss: {g_loss}")


In [49]:
# Set parameters
latent_dim = 100  # Dimensionality of the random noise
output_dim = df_combined.shape[1]  # Number of features in the data

# Build and compile models
discriminator = build_discriminator(output_dim)
generator = build_generator(latent_dim, output_dim)
gan = build_gan(generator, discriminator)

# Train the GAN for 11 epochs
epochs = 11
batch_size = 32
train_gan(epochs, batch_size, df_combined, generator, discriminator, gan, latent_dim)


ValueError: Cannot take a larger sample than population when 'replace=False'

In [52]:
import numpy as np

# Define the training loop for GAN
def train_gan(epochs, batch_size, df_combined, generator, discriminator, gan, latent_dim):
    half_batch = batch_size // 2

    for epoch in range(epochs):
        # Ensure batch_size is not greater than dataset size
        if batch_size > df_combined.shape[0]:
            batch_size = df_combined.shape[0]

        # Sample a random half batch of real data
        real_batch = df_combined.sample(n=half_batch, replace=True)  # Use replace=True to avoid sampling errors
        real_labels = np.ones((half_batch, 1))  # Labels for real data (1)

        # Generate fake data
        noise = np.random.normal(0, 1, (half_batch, latent_dim))  # Latent space noise for generating fake data
        fake_batch = generator.predict(noise)
        fake_labels = np.zeros((half_batch, 1))  # Labels for fake data (0)

        # Train the discriminator on real and fake data
        d_loss_real = discriminator.train_on_batch(real_batch, real_labels)
        d_loss_fake = discriminator.train_on_batch(fake_batch, fake_labels)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train the generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))  # Latent space noise for the generator
        valid_labels = np.ones((batch_size, 1))  # Labels for the generator to fool the discriminator
        g_loss = gan.train_on_batch(noise, valid_labels)

        # Print the progress every epoch
        if epoch % 1 == 0:
            print(f"Epoch {epoch} | D Loss: {d_loss[0]} | G Loss: {g_loss}")

# Set parameters for the GAN model
latent_dim = 100  # Dimensionality of the random noise
output_dim = df_combined.shape[1]  # Number of features in the data

# Build and compile the models
discriminator = build_discriminator(output_dim)
generator = build_generator(latent_dim, output_dim)
gan = build_gan(generator, discriminator)

# Train the GAN for 11 epochs
epochs = 11
batch_size = 32
train_gan(epochs, batch_size, df_combined, generator, discriminator, gan, latent_dim)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type int).

In [53]:
import numpy as np

# Define the training loop for GAN
def train_gan(epochs, batch_size, df_combined, generator, discriminator, gan, latent_dim):
    half_batch = batch_size // 2

    for epoch in range(epochs):
        # Ensure batch_size is not greater than dataset size
        if batch_size > df_combined.shape[0]:
            batch_size = df_combined.shape[0]

        # Sample a random half batch of real data
        real_batch = df_combined.sample(n=half_batch, replace=True)  # Use replace=True to avoid sampling errors
        real_batch = real_batch.astype(np.float32)  # Convert to float32 for compatibility
        real_labels = np.ones((half_batch, 1), dtype=np.float32)  # Labels for real data (1)

        # Generate fake data
        noise = np.random.normal(0, 1, (half_batch, latent_dim))  # Latent space noise for generating fake data
        fake_batch = generator.predict(noise)
        fake_batch = fake_batch.astype(np.float32)  # Convert to float32 for compatibility
        fake_labels = np.zeros((half_batch, 1), dtype=np.float32)  # Labels for fake data (0)

        # Train the discriminator on real and fake data
        d_loss_real = discriminator.train_on_batch(real_batch, real_labels)
        d_loss_fake = discriminator.train_on_batch(fake_batch, fake_labels)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train the generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))  # Latent space noise for the generator
        valid_labels = np.ones((batch_size, 1), dtype=np.float32)  # Labels for the generator to fool the discriminator
        g_loss = gan.train_on_batch(noise, valid_labels)

        # Print the progress every epoch
        if epoch % 1 == 0:
            print(f"Epoch {epoch} | D Loss: {d_loss[0]} | G Loss: {g_loss}")

# Set parameters for the GAN model
latent_dim = 100  # Dimensionality of the random noise
output_dim = df_combined.shape[1]  # Number of features in the data

# Build and compile the models
discriminator = build_discriminator(output_dim)
generator = build_generator(latent_dim, output_dim)
gan = build_gan(generator, discriminator)

# Train the GAN for 11 epochs
epochs = 11
batch_size = 32
train_gan(epochs, batch_size, df_combined, generator, discriminator, gan, latent_dim)


ValueError: could not convert string to float: '2023-06-01'

In [54]:
import numpy as np

# Preprocess the data
# Drop the 'Date' column (if not needed)
df_combined = df_combined.drop(columns=['Date'], axis=1)

# If you want to keep 'Date', convert it to numeric
# df_combined['Date'] = pd.to_datetime(df_combined['Date'])
# reference_date = df_combined['Date'].min()
# df_combined['Date'] = (df_combined['Date'] - reference_date).dt.days

# Ensure all data is numeric and of type float32
df_combined = df_combined.astype(np.float32)

# Define the training loop for GAN
def train_gan(epochs, batch_size, df_combined, generator, discriminator, gan, latent_dim):
    half_batch = batch_size // 2

    for epoch in range(epochs):
        # Ensure batch_size is not greater than dataset size
        if batch_size > df_combined.shape[0]:
            batch_size = df_combined.shape[0]

        # Sample a random half batch of real data
        real_batch = df_combined.sample(n=half_batch, replace=True)  # Use replace=True to avoid sampling errors
        real_labels = np.ones((half_batch, 1), dtype=np.float32)  # Labels for real data (1)

        # Generate fake data
        noise = np.random.normal(0, 1, (half_batch, latent_dim))  # Latent space noise for generating fake data
        fake_batch = generator.predict(noise)
        fake_batch = fake_batch.astype(np.float32)  # Convert to float32 for compatibility
        fake_labels = np.zeros((half_batch, 1), dtype=np.float32)  # Labels for fake data (0)

        # Train the discriminator on real and fake data
        d_loss_real = discriminator.train_on_batch(real_batch, real_labels)
        d_loss_fake = discriminator.train_on_batch(fake_batch, fake_labels)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train the generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))  # Latent space noise for the generator
        valid_labels = np.ones((batch_size, 1), dtype=np.float32)  # Labels for the generator to fool the discriminator
        g_loss = gan.train_on_batch(noise, valid_labels)

        # Print the progress every epoch
        if epoch % 1 == 0:
            print(f"Epoch {epoch} | D Loss: {d_loss[0]} | G Loss: {g_loss}")

# Set parameters for the GAN model
latent_dim = 100  # Dimensionality of the random noise
output_dim = df_combined.shape[1]  # Number of features in the data

# Build and compile the models
discriminator = build_discriminator(output_dim)
generator = build_generator(latent_dim, output_dim)
gan = build_gan(generator, discriminator)

# Train the GAN for 11 epochs
epochs = 11
batch_size = 32
train_gan(epochs, batch_size, df_combined, generator, discriminator, gan, latent_dim)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 180ms/step




Epoch 0 | D Loss: 3.9848082065582275 | G Loss: 0.7045594453811646
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Epoch 1 | D Loss: 3.350250720977783 | G Loss: 0.6986794471740723
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Epoch 2 | D Loss: 3.112456798553467 | G Loss: 0.6990153789520264
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Epoch 3 | D Loss: 3.0178492069244385 | G Loss: 0.6954140663146973
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Epoch 4 | D Loss: 2.9799389839172363 | G Loss: 0.6956451535224915
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Epoch 5 | D Loss: 2.9362335205078125 | G Loss: 0.6947593688964844
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Epoch 6 | D Loss: 2.9141032695770264 | G Loss: 0.6936522126197815
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Epoch 7 | D Loss: 2.8954367637634277

In [55]:
# Generate new data samples using the trained generator
latent_space_samples = np.random.normal(0, 1, (10, latent_dim))  # Generate 10 random noise samples
generated_samples = generator.predict(latent_space_samples)

print("Generated samples:")
print(generated_samples)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
Generated samples:
[[ 0.15560116  0.08922563  0.00285088  0.17718494  0.24709041 -0.04155008
   0.23591311]
 [-0.44223368 -0.6806918  -0.09609998 -0.67217153  0.6783975   0.09297372
  -0.10788077]
 [ 0.1272744   0.2926574   0.41785666 -0.05850643  0.45023146  0.4533676
  -0.30686688]
 [ 0.08855585 -0.30246976  0.09566128  0.21743089  0.29993224  0.27070093
  -0.05140532]
 [-0.25687575 -0.38273355 -0.2941626   0.07576579  0.5934957   0.02866942
  -0.38416153]
 [-0.2443263  -0.10615853 -0.04669548  0.04763838  0.34757546 -0.04140759
   0.02360817]
 [-0.5755819  -0.70635176  0.17860775 -0.6150898   0.59605885 -0.1328187
  -0.06354292]
 [-0.30063245  0.01218119 -0.28941408  0.17875375  0.35048565  0.24537799
  -0.43220153]
 [ 0.03023698 -0.32376248 -0.11038805 -0.03430663  0.29182947  0.17920455
  -0.03201198]
 [-0.37833878 -0.4326068  -0.2658335   0.21874559  0.3943074   0.45660228
  -0.7095246 ]]


In [56]:
import numpy as np
import pandas as pd

# Set the number of samples you want to generate
num_samples = 1000

# Generate random noise (latent space samples)
latent_space_samples = np.random.normal(0, 1, (num_samples, latent_dim))

# Generate data using the trained generator
generated_data = generator.predict(latent_space_samples)

# Convert the generated data into a DataFrame
generated_df = pd.DataFrame(generated_data, columns=df_combined.columns)

# Save the generated data to a CSV file
generated_df.to_csv('generated_data.csv', index=False)

print("Generated data saved to 'generated_data.csv'")


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Generated data saved to 'generated_data.csv'


In [57]:
import numpy as np
import pandas as pd

# Set the number of rows you want to generate
num_samples = 1000

# Generate random noise (latent space samples)
latent_space_samples = np.random.normal(0, 1, (num_samples, latent_dim))

# Generate data using the trained generator
generated_data = generator.predict(latent_space_samples)

# Convert the generated data into a DataFrame
generated_df = pd.DataFrame(generated_data, columns=df_combined.columns)

# Save the generated data to a CSV file in your Colab environment
generated_df.to_csv('/content/generated_data.csv', index=False)

# Confirm the file has been created
print("Generated data saved to 'generated_data.csv' at '/content/generated_data.csv'")


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Generated data saved to 'generated_data.csv' at '/content/generated_data.csv'


In [58]:
from google.colab import files
files.download('/content/generated_data.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>