## messy-vs-clean-room(uncleaned)

In [3]:
import requests
import numpy as np
import io

# The original GitHub URL you provided:
github_url = "https://github.com/GuanqiaoDing/messy-room-classifier/blob/master/data/room_dataset.npy"

# Convert the GitHub web URL to the raw file URL
raw_url = github_url.replace("blob", "raw")

# Use the requests library to fetch the content from the raw URL
response = requests.get(raw_url)

# Check if the request was successful
if response.status_code == 200:
    # Read the content into a BytesIO object so numpy can load it from memory
    # rather than saving it to a local file first
    file_bytes = io.BytesIO(response.content)
    
    # Load the NumPy array from the bytes data
    # The allow_pickle=True might be needed depending on how the npy file was saved
    data = np.load(file_bytes, allow_pickle=True)
    
    print("Dataset loaded successfully!")
    print(f"Data shape: {data.shape}")
    print(f"Data type: {type(data)}")
    print(f"First few entries: {data[:5]}")
else:
    print(f"Failed to download the file. Status code: {response.status_code}")


Dataset loaded successfully!
Data shape: (2, 2)
Data type: <class 'numpy.ndarray'>
First few entries: [[array([[[[163, 182, 203],
           [163, 182, 203],
           [162, 181, 202],
           ...,
           [205, 210, 219],
           [205, 210, 219],
           [206, 211, 220]],

          [[163, 182, 203],
           [163, 182, 203],
           [162, 181, 202],
           ...,
           [206, 211, 220],
           [206, 211, 220],
           [208, 213, 222]],

          [[162, 181, 203],
           [163, 182, 203],
           [162, 181, 202],
           ...,
           [206, 211, 220],
           [206, 211, 220],
           [207, 212, 221]],

          ...,

          [[ 31,  32,  28],
           [ 32,  33,  29],
           [ 41,  42,  38],
           ...,
           [ 32,  37,  35],
           [ 32,  37,  36],
           [ 32,  37,  36]],

          [[ 30,  31,  27],
           [ 31,  32,  27],
           [ 35,  39,  33],
           ...,
           [ 32,  36,  34],
          

## importing the MNIST dataset(cleaned)

In [2]:
import numpy as np
import urllib.request
import os
import pandas as pd

url="https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz"
local_path = "mnist.npz"

try:
    # 1. Download the file from the URL
    urllib.request.urlretrieve(url, local_path)
    print("Download complete.")

    # 2. Load the data using numpy
    data = np.load(local_path, allow_pickle=True)

    # 3. Extract the data arrays from the loaded object
    xtrain = data['x_train']
    ytrain = data['y_train']
    xtest = data['x_test']
    ytest = data['y_test']
    
    print("Dataset successfully loaded.")

    # 4. Display shapes to verify
    print(f"x train shape: {xtrain.shape}")
    print(f"y train shape: {ytrain.shape}")
    print(f"x test shape: {xtest.shape}")
    print(f"y test labels shape: {ytest.shape}")

except Exception as e:
    print(f"An error occurred: {e}")

Download complete.
Dataset successfully loaded.
x train shape: (60000, 28, 28)
y train shape: (60000,)
x test shape: (10000, 28, 28)
y test labels shape: (10000,)
