In [1]:
# import main lib for machine learning

import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import seaborn as sns
import os

**Import the data from the excel file**

In [4]:
import os
from PIL import Image

# Define paths to your dataset
base_path = '/Users/msalah/Downloads/triple_mnist/'
train_path = os.path.join(base_path, 'train')
test_path = os.path.join(base_path, 'test')
val_path = os.path.join(base_path, 'val')

def load_images_from_folder(folder_path):
    images = []
    labels = []
    class_folders = sorted(os.listdir(folder_path))  # Assumes class folders are named by numbers or labels
    for class_folder in class_folders:
        class_path = os.path.join(folder_path, class_folder)
        if os.path.isdir(class_path):
            for img_file in os.listdir(class_path):
                img_path = os.path.join(class_path, img_file)
                if img_path.endswith('.jpg') or img_path.endswith('.png'):  # Adjust based on your image formats
                    try:
                        img = Image.open(img_path)
                        img = np.array(img)  # Convert PIL Image to numpy array
                        images.append(img)
                        labels.append(int(class_folder))  # Assuming class folders are named numerically
                    except Exception as e:
                        print(f"Error loading image: {img_path}. {e}")
    return images, labels

# Load images and labels for train, test, and val sets
x_train, y_train = load_images_from_folder(train_path)
x_test, y_test = load_images_from_folder(test_path)
x_val, y_val = load_images_from_folder(val_path)

# Convert lists to numpy arrays for easier manipulation (optional)
x_train = np.array(x_train)
x_test = np.array(x_test)
x_val = np.array(x_val)

y_train = np.array(y_train)
y_test = np.array(y_test)
y_val = np.array(y_val)

# Optionally, you can print out the shapes to verify
print(f"Train images shape: {x_train.shape}, Train labels shape: {y_train.shape}")
print(f"Test images shape: {x_test.shape}, Test labels shape: {y_test.shape}")
print(f"Val images shape: {x_val.shape}, Val labels shape: {y_val.shape}")


Train images shape: (64000, 84, 84), Train labels shape: (64000,)
Test images shape: (20000, 84, 84), Test labels shape: (20000,)
Val images shape: (16000, 84, 84), Val labels shape: (16000,)


In [5]:


# Discover y_train
distinct_labels = np.unique(y_train)
print(distinct_labels)


[  0   4   5   6   7   9  11  13  15  18  20  21  22  23  24  26  27  28
  30  31  32  33  34  35  36  37  38  40  41  42  43  45  48  49  50  52
  54  55  57  59  61  62  63  64  66  70  71  72  74  75  78  79  80  82
  84  85  89  90  91  93  94  95  97 100 101 102 103 104 105 107 110 114
 115 116 117 118 119 120 121 122 124 125 127 128 131 132 134 138 142 145
 147 148 150 151 153 155 156 157 159 160 161 162 163 164 165 166 167 169
 170 171 172 174 175 177 178 179 181 182 183 184 185 188 189 190 191 192
 195 196 199 200 201 202 203 204 205 209 210 211 212 216 217 219 221 223
 226 227 228 229 230 231 232 233 234 235 236 237 239 240 241 242 243 245
 246 248 249 250 252 254 256 258 260 261 263 264 266 267 268 269 270 272
 273 274 275 276 277 278 279 280 282 283 284 285 287 288 289 291 292 294
 295 297 298 299 300 302 303 306 307 308 309 310 311 313 314 316 317 318
 320 326 327 328 329 331 332 333 335 336 338 343 344 345 346 347 348 349
 351 352 353 356 362 363 366 367 368 369 370 372 37

In [6]:
 #Discover y_test
distinct_labels = np.unique(y_test)
print(distinct_labels)

[  2   3   8  14  16  17  39  46  47  51  56  58  60  65  67  68  73  76
  77  86  87  88  96  98  99 106 109 111 113 123 129 135 139 140 141 146
 154 158 176 180 186 193 198 206 208 213 214 222 224 244 253 255 257 259
 262 265 271 290 296 301 304 305 315 319 321 322 323 339 340 342 354 357
 358 359 360 364 365 371 377 380 382 385 390 394 401 409 411 412 418 419
 420 424 428 433 434 438 440 451 460 465 467 471 472 473 480 481 484 486
 489 490 493 496 497 504 510 513 515 530 537 539 544 552 555 559 569 575
 576 582 588 595 596 608 615 628 630 631 635 638 645 647 661 665 680 681
 695 717 720 737 742 745 755 759 762 763 765 766 768 780 781 782 790 806
 812 815 817 819 823 824 826 836 837 842 843 846 860 862 865 866 867 873
 878 884 885 895 899 907 919 929 942 945 951 954 956 957 964 976 988 990
 996 998]


In [7]:
#discover y_val
distinct_labels = np.unique(y_val)
print(distinct_labels)

[  1  10  12  19  25  29  44  53  69  81  83  92 108 112 126 130 133 136
 137 143 144 149 152 168 173 187 194 197 207 215 218 220 225 238 247 251
 281 286 293 312 324 325 330 334 337 341 350 355 361 383 393 400 405 407
 410 413 422 423 432 436 439 441 442 454 459 464 466 475 477 488 494 495
 501 502 503 506 507 516 519 535 540 550 557 562 570 572 573 574 579 583
 589 592 593 601 602 604 609 611 612 622 627 629 634 636 639 649 651 658
 673 679 686 697 703 707 715 718 726 731 740 741 746 750 772 775 779 789
 794 796 816 829 841 847 851 852 855 879 880 886 888 889 890 900 904 906
 910 911 912 915 926 940 944 955 961 962 963 975 980 981 992 999]
