<a href="https://colab.research.google.com/github/Waleed850/Coding-Code/blob/main/Crop_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Data **Preprocessing**

In [None]:
! git clone https://github.com/Waleed850/Coding-Code.git

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
import numpy as np
from osgeo import gdal, gdal_array
import pandas as pd
import operator
import gc
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [15]:
# Load ROI data
roi_ds = gdal.Open('/content/drive/MyDrive/GDToT/T34JEP_20170101T082332_TCI.jp2', gdal.GA_ReadOnly)
roi = roi_ds.GetRasterBand(1).ReadAsArray()

In [16]:
# Identify top classes
classes = np.unique(roi)
class_counts = {c: (roi == c).sum() for c in classes}
sorted_classes = sorted(class_counts.items(), key=operator.itemgetter(1), reverse=True)
top_classes = [c for c, count in sorted_classes if c != 255][:5]  # Exclude class 255 and take top 5


In [18]:
# Print class info
print("Top 5 classes and their pixel counts:")
for c in top_classes:
    print(f"Class {c} contains {class_counts[c]} pixels")


Top 5 classes and their pixel counts:
Class 162 contains 3016540 pixels
Class 163 contains 3014673 pixels
Class 161 contains 3000717 pixels
Class 164 contains 2998157 pixels
Class 160 contains 2971168 pixels


In [19]:
# Get classification labels
labels = np.unique(roi[roi > 0])

In [20]:
# Images to process
images = [
    '/content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B01.jp2',
    '/content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B02.jp2',
    '/content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B03.jp2',
    '/content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B04.jp2',
    '/content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B05.jp2'
]

In [21]:
# Dataframe to hold all data
final = pd.DataFrame()

In [22]:

# Process each class
for c in top_classes:
    print(f"Processing class {c}")
    class_data = []

    for img_path in images:
        print(f"Reading image: {img_path}")
        train_ds = gdal.Open(img_path, gdal.GA_ReadOnly)
        if train_ds is None:
            raise FileNotFoundError(f"Image file {img_path} not found or unable to open.")

        img_b1 = np.zeros((train_ds.RasterYSize, train_ds.RasterXSize, train_ds.RasterCount),
                          gdal_array.GDALTypeCodeToNumericTypeCode(train_ds.GetRasterBand(1).DataType))

        for b in range(img_b1.shape[2]):
            img_b1[:, :, b] = train_ds.GetRasterBand(b + 1).ReadAsArray()

        # Resize ROI to match image dimensions
        roi_resized = np.resize(roi, (img_b1.shape[0], img_b1.shape[1]))

        mask = (roi_resized == c)
        if mask.shape != img_b1[:, :, 0].shape:
            raise ValueError(f"Mask shape {mask.shape} does not match image shape {img_b1[:, :, 0].shape}")

        for b in range(img_b1.shape[2]):
            # Collect data for the class
            class_data.append(img_b1[mask, b].flatten())

    # Ensure all arrays in class_data have the same length
    min_length = min(map(len, class_data))
    class_data = [arr[:min_length] for arr in class_data]

    # Convert to DataFrame
    class_df = pd.DataFrame(np.column_stack(class_data))
    class_df['class'] = c

    # Append to final dataset
    final = pd.concat([final, class_df], axis=0)

    gc.collect()

# Reset index
final.reset_index(drop=True, inplace=True)

# Save to CSV
final.to_csv("Dataset123.csv", index=False)

print("Data preprocessing completed and saved to Dataset123.csv")


Processing class 162
Reading image: /content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B01.jp2
Reading image: /content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B02.jp2
Reading image: /content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B03.jp2
Reading image: /content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B04.jp2
Reading image: /content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B05.jp2
Processing class 163
Reading image: /content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B01.jp2
Reading image: /content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B02.jp2
Reading image: /content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B03.jp2
Reading image: /content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B04.jp2
Reading image: /content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B05.jp2
Processing class 161
Reading image: /content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B01.jp2
Reading image: /content/drive/MyDrive/GDToT/T34JEP_20170101T082332_B02.jp2
Reading image: /content/drive/MyDrive

In [23]:
# Reset index and shuffle
final.reset_index(drop=True, inplace=True)
final = final.sample(frac=1).reset_index(drop=True)  # Shuffle the dataframe

In [24]:
# Standardize features
scaler = StandardScaler()
features = final.drop(columns=['class'])
features_scaled = scaler.fit_transform(features)

In [26]:
# Apply PCA to reduce to a reasonable number of components
n_components = min(5, features_scaled.shape[0], features_scaled.shape[1])  # Ensure n_components is valid
pca = PCA(n_components=n_components)
features_pca = pca.fit_transform(features_scaled)

In [27]:

# Combine PCA features with labels
features_pca_df = pd.DataFrame(features_pca)
features_pca_df['class'] = final['class'].values

In [28]:
# Save to CSV
features_pca_df.to_csv("Dataset123.csv", index=False)
print("Final dataset shape:", features_pca_df.shape)
print("Saved to Dataset123.csv")

Final dataset shape: (355524, 6)
Saved to Dataset123.csv


In [29]:
final

Unnamed: 0,0,1,2,3,4,class
0,1355,1285,1522,2287,2373,164
1,1469,1192,1347,2276,2453,164
2,1235,1222,1389,2262,2680,163
3,1288,1230,1370,2234,2504,161
4,1375,1246,1402,2222,2593,160
...,...,...,...,...,...,...
355519,1301,1113,1221,2243,2591,161
355520,1325,1166,1304,2226,2715,160
355521,1400,1240,1379,2274,2413,163
355522,1433,1297,1461,2256,2619,162


**Crop Classification**

In [30]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras import regularizers

In [32]:
# Load the dataset
final = pd.read_csv("/content/Dataset123.csv")
final.columns = ['col_' + str(i) for i in range(6)]

In [33]:
# Separate features and labels
data = final.iloc[:, :-1]
labels = final['col_5']

In [34]:
# Encode class values as integers
encoder = LabelEncoder()
encoded_Y = encoder.fit_transform(labels)
dummy_y = to_categorical(encoded_Y, num_classes=5)

In [35]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(data, dummy_y, test_size=0.33, random_state=42, shuffle=True)

In [36]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [37]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras import regularizers

In [38]:
# Build the model
model = Sequential()
model.add(Dense(200, input_shape=(X_train.shape[1],), activation='relu',
                kernel_regularizer=regularizers.l2(1e-5),
                kernel_initializer='glorot_normal', bias_initializer='zeros'))
model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(5, activation='softmax'))

In [39]:

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

In [40]:
final.head()

Unnamed: 0,col_0,col_1,col_2,col_3,col_4,col_5
0,1.964567,0.355314,-0.645622,1.221855,-0.63267,164
1,-0.84246,0.905364,0.063513,1.11446,-0.047356,164
2,-0.19845,0.220004,-1.324681,-0.543195,-0.113633,163
3,-0.429531,-0.678828,-0.01692,-0.91437,0.098077,161
4,-0.056206,-0.257927,0.736538,-1.49125,-0.050594,160


In [41]:
# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=100, shuffle=True)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [42]:
# Evaluate the model
score = model.evaluate(X_test, y_test, batch_size=32)
print("\n%s: %.2f%%" % (model.metrics_names[1], score[1] * 100))


accuracy: 94.32%


In [45]:
model.save('crop_classification_model.h5')


  saving_api.save_model(
