In [207]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error
from keras.models import load_model
import ast
from keras.preprocessing.sequence import pad_sequences

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [208]:
# Load the data
data = pd.read_csv('/content/drive/MyDrive/SIFT/Dataframe_skripsi_sift_100m_30.csv',
                 converters={'Carbon Values': lambda x: float(x.replace(',', '.'))})

data.iloc[0]

Unnamed: 0                                                            0
image_path            /content/drive/MyDrive/Skripsi/Cropped_Image/K...
HST                                                                   0
Feature Extraction    [[8.517142295837402, 5.055912971496582], [11.4...
Descriptors           [[1.0, 0.0, 0.0, 0.0, 1.0, 24.0, 51.0, 45.0, 1...
Carbon Values                                                     28.65
Name: 0, dtype: object

In [209]:
# Check for NaN values in the 'Feature Extraction' and 'Descriptors' columns
nan_rows_fe = data[data['Feature Extraction'].isna()]
nan_rows_d = data[data['Descriptors'].isna()]

if not nan_rows_fe.empty:
    print(f'Found {len(nan_rows_fe)} rows with NaN values in the Feature Extraction column.')
    data = data.dropna(subset=['Feature Extraction'])

if not nan_rows_d.empty:
    print(f'Found {len(nan_rows_d)} rows with NaN values in the Descriptors column.')
    data = data.dropna(subset=['Descriptors'])

In [210]:
# Convert the string representation of a nested list into a nested list of floats
data['Feature Extraction'] = data['Feature Extraction'].apply(lambda x: [[float(i) for i in inner] for inner in ast.literal_eval(x)])
data['Descriptors'] = data['Descriptors'].apply(lambda x: [[float(i) for i in inner] for inner in ast.literal_eval(x)])

In [211]:
# Convert nested lists into flat lists
data['Feature Extraction'] = data['Feature Extraction'].apply(lambda x: [item for sublist in x for item in sublist])
data['Descriptors'] = data['Descriptors'].apply(lambda x: [item for sublist in x for item in sublist])

In [212]:
data['Descriptors']

0      [1.0, 0.0, 0.0, 0.0, 1.0, 24.0, 51.0, 45.0, 1....
1      [10.0, 2.0, 4.0, 110.0, 116.0, 18.0, 7.0, 6.0,...
2      [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...
3      [0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 6.0, 0.0, 0.0, ...
4      [26.0, 9.0, 35.0, 38.0, 33.0, 12.0, 50.0, 58.0...
                             ...                        
535    [2.0, 15.0, 40.0, 5.0, 11.0, 46.0, 19.0, 9.0, ...
536    [140.0, 45.0, 6.0, 13.0, 45.0, 26.0, 10.0, 29....
537    [6.0, 2.0, 0.0, 12.0, 35.0, 20.0, 13.0, 14.0, ...
538    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 7.0, ...
539    [11.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 13.0, 132...
Name: Descriptors, Length: 540, dtype: object

In [213]:
print(len(data.iloc[0]['Descriptors']))

1792


In [214]:
print(data.iloc[13]['Feature Extraction'])

print(len(max(data['Feature Extraction'])))

print(len(max(data['Descriptors'])))

[2.250148296356201, 4.814320087432861, 2.6799728870391846, 20.831092834472656, 4.117025852203369, 15.536389350891113, 5.026491641998291, 26.424013137817383, 5.509462833404541, 18.37647819519043, 5.509462833404541, 18.37647819519043, 8.460746765136719, 23.84880256652832, 11.896467208862305, 18.00596809387207, 12.999123573303223, 3.2669873237609863, 14.272517204284668, 12.79320240020752, 14.272517204284668, 12.79320240020752, 14.272517204284668, 12.79320240020752, 14.530510902404785, 20.4268856048584, 14.980006217956543, 26.492067337036133, 15.615193367004395, 5.4109015464782715, 17.486188888549805, 11.834003448486328, 17.722707748413086, 17.322750091552734, 18.160329818725586, 3.305634021759033, 19.639934539794922, 25.766830444335938, 19.639934539794922, 25.766830444335938, 20.484264373779297, 5.399669647216797, 20.66260528564453, 9.732950210571289, 24.85502815246582, 16.57094383239746]
16
2304


In [215]:
# Pad the 'Feature Extraction' and 'Descriptors' lists
data['Feature Extraction'] = pad_sequences(data['Feature Extraction'], dtype='float32').tolist()
data['Descriptors'] = pad_sequences(data['Descriptors'], dtype='float32').tolist()

In [216]:
print(len(data.iloc[0]['Descriptors']))

5120


In [217]:
# New DataFrame
df = pd.DataFrame(data)

# Iterate over each row
for index, row in df.iterrows():
    # Extract 'Feature Extraction' and 'Descriptors'
    feature_extraction = row['Feature Extraction']
    descriptors = row['Descriptors']

    # Pad the lists to a fixed length (if necessary)
    # max_length = max(len(feature_extraction), len(descriptors))
    max_length_features =  len(max(data['Feature Extraction']))
    max_length_descriptors = len(max(data['Descriptors']))

    padded_feature_extraction = feature_extraction
    padded_descriptors = descriptors

    feature_extraction_cols = [f'Feature Extraction {i}' for i in range(max_length_features)]
    descriptors_cols = [f'Descriptors {i}' for i in range(max_length_descriptors)]

    new_row_data = {
        'HST': row['HST'],
        **{col: val for col, val in zip(feature_extraction_cols, padded_feature_extraction)},
        **{col: val for col, val in zip(descriptors_cols, padded_descriptors)},
        'Carbon Values': row['Carbon Values']
    }

#     # Append the new row to the new DataFrame
#     # Append the new row to the new DataFrame
    df = pd.concat([df, pd.DataFrame(new_row_data, index=[0])], ignore_index=True)
# print(max_length)

In [218]:
# import pandas as pd

# # Function to pad lists to a specified length
# def pad_list(lst, max_length):
#     return lst + [None] * (max_length - len(lst))

# # Create a new DataFrame for the transformed data
# new_rows = []

# df = pd.DataFrame(data)
# # Iterate over each row
# for index, row in df.iterrows():
#     # Extract 'Feature Extraction' and 'Descriptors'
#     feature_extraction = row['Feature Extraction']
#     descriptors = row['Descriptors']

#     # Determine the maximum lengths for padding
#     max_length_features = len(max(data['Feature Extraction'], key=len))
#     max_length_descriptors = len(max(data['Descriptors'], key=len))

#     # Pad the lists to the maximum length
#     padded_feature_extraction = pad_list(feature_extraction, max_length_features)
#     padded_descriptors = pad_list(descriptors, max_length_descriptors)

#     # Create column names for padded features and descriptors
#     feature_extraction_cols = [f'Feature Extraction {i}' for i in range(max_length_features)]
#     descriptors_cols = [f'Descriptors {i}' for i in range(max_length_descriptors)]

#     # Create a new row with padded data
#     new_row_data = {
#         'HST': row['HST'],
#         **{col: val for col, val in zip(feature_extraction_cols, padded_feature_extraction)},
#         **{col: val for col, val in zip(descriptors_cols, padded_descriptors)},
#         'Carbon Values': row['Carbon Values']
#     }

#     # Append the new row to the list of new rows
#     new_rows.append(new_row_data)

# # Create a new DataFrame from the new rows
# new_df = pd.DataFrame(new_rows)

# print(new_df)


In [219]:
# Rearrange columns
df = df[['HST'] + feature_extraction_cols + descriptors_cols + ['Carbon Values']]

In [220]:
# Save DataFrame to CSV
file_name = 'dataframe_sift_100_30.csv'
df.to_csv(file_name, index=False)

In [221]:
# file_name = 'Dataframe_skripsi_100m_trial.xlsx'
# df.to_excel(file_name)
# print('DataFrame is written to CSV File successfully.')

In [222]:
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Upload the Excel file to a specific folder in Google Drive

file_path = '/content/' + file_name  # Assuming the file is in the Colab environment
folder_id = '1smZBhMpve1B0sWQ9yV3lbx7I9i71nXzy'  # Replace 'YOUR_FOLDER_ID' with the actual folder ID
file = drive.CreateFile({'title': file_name, 'parents': [{'id': folder_id}]})
file.SetContentFile(file_path)
file.Upload()

# print(f'{file_name} is uploaded to Google Drive folder successfully.')
