# Mars Images

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
import math

## Load Data

In [2]:
folder_path = "calibrated"

# Get a list of all files in the folder
file_list = os.listdir(folder_path)

# Filter the list to keep only files with certain extensions (e.g., .jpg, .png)
image_files = [file for file in file_list if file.lower().endswith(('.jpg', '.png'))]

# Load images into Python
images = [Image.open(os.path.join(folder_path, file)) for file in image_files]

In [3]:
labels = pd.read_fwf('msl_synset_words-indexed.txt', sep=' ', header=None)
labels.rename(columns={0:'Label', 1:'Description'}, inplace=True)

In [4]:
train_shuffled = pd.read_fwf('train-calibrated-shuffled.txt', sep=' ', header=None)
train_shuffled.rename(columns={0:'Filename', 1:'Label'}, inplace=True)
train_shuffled['Filename'] = train_shuffled['Filename'].str[11:]

In [5]:
val_shuffled = pd.read_fwf('val-calibrated-shuffled.txt', sep=' ', header=None)
val_shuffled.rename(columns={0:'Filename', 1:'Label'}, inplace=True)
val_shuffled['Filename'] = val_shuffled['Filename'].str[11:]

In [6]:
test_shuffled = pd.read_fwf('test-calibrated-shuffled.txt', sep=' ', header=None)
test_shuffled.rename(columns={0:'Filename', 1:'Label'}, inplace=True)
test_shuffled['Filename'] = test_shuffled['Filename'].str[11:]

In [7]:
filenames = pd.DataFrame(os.listdir('calibrated'))
filenames.rename(columns={0:'Filename'}, inplace=True)

## Assign Labels to Images

In [8]:
filenames =filenames.merge(train_shuffled, how='left', on='Filename')

dfs = [val_shuffled, test_shuffled]
for df in dfs:
    for val in df['Filename']:
        i = 0
        j = df.index[df['Filename'] == val]
        while i < len(filenames):
            if val == filenames['Filename'][i]:
                filenames['Label'][i] = df['Label'][j]
            i += 1
            
filenames

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filenames['Label'][i] = df['Label'][j]


Unnamed: 0,Filename,Label
0,0003ML0000000110100031E01_DRCL.JPG,9.0
1,0003ML0000000110100031I01_DRCL.JPG,9.0
2,0003ML0000000120100032E01_DRCL.JPG,9.0
3,0003ML0000000120100032I01_DRCL.JPG,9.0
4,0003ML0000000130100033E01_DRCL.JPG,9.0
...,...,...
6732,1048MR0046080000104543I01_DRCL.JPG,0.0
6733,1059ML0046560000306154E01_DRCL.JPG,4.0
6734,1059MR0046570000104690E01_DRCL.JPG,4.0
6735,1060ML0046630000306178E01_DRCL.JPG,4.0


In [10]:
filenames.isna().sum()

Filename     0
Label       46
dtype: int64

In [12]:
len(images)

6737

## Exploration

In [None]:
# Assuming 'images' is the list of PIL Image objects
num_images_to_display = 12

# Calculate the number of rows and columns for the subplot grid
num_rows = math.ceil(num_images_to_display / 4)
num_cols = min(num_images_to_display, 4)

# Create a 3x4 grid of subplots
fig, axes = plt.subplots(num_rows, num_cols, figsize=(12, 9))

# Flatten the axes array if it's more than 1D
axes = axes.flatten()

# Display the first few images in the subplots
for i in range(num_images_to_display):
    j = i*200
    if i < len(images):
        axes[i].imshow(images[j])
        axes[i].set_title(f"Image {j + 1}")

# Remove any empty subplots
for i in range(num_images_to_display, len(axes)):
    fig.delaxes(axes[i])

plt.tight_layout()
plt.show()