In [None]:
import pandas as pd
import os

train_csv = 'data/train.csv'
test_csv = 'data/test.csv'
train_images_path = 'data/train_ims/'
test_images_path = 'data/test_ims/'

train_df = pd.read_csv(train_csv)
print(train_df.head())

missing_files = [f for f in train_df['im_name'] if not os.path.exists(os.path.join(train_images_path, f))]
if missing_files:
    print("Missing files:", missing_files)


       im_name  label
0  00016cd.jpg      6
1  0001808.jpg      2
2  0002399.jpg      1
3  0003973.jpg      3
4  00061cc.jpg      4


In [2]:
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from skimage.io import imread
from skimage.color import rgb2gray
from skimage.feature import hog
from skimage.transform import resize

def load_and_preprocess_images(df, folder):
    images = []
    labels = []
    for index, row in df.iterrows():
        image_path = f"{folder}/{row['im_name']}"
        image = imread(image_path)
        images.append(image)
        labels.append(row['label'])
    return np.array(images), np.array(labels)

train_data, train_labels = load_and_preprocess_images(train_df, train_images_path)


In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
from skimage.io import imread
from skimage.color import rgb2gray
from skimage.feature import hog
from skimage.transform import resize

class HOGFeatureExtractor(BaseEstimator, TransformerMixin):
    def __init__(self, resize_shape=(64, 64), orientations=12, pixels_per_cell=(8, 8), cells_per_block=(4, 4)):
        self.resize_shape = resize_shape
        self.orientations = orientations
        self.pixels_per_cell = pixels_per_cell
        self.cells_per_block = cells_per_block

    def fit(self, X, y=None):
        return self 

    def transform(self, X):
        hog_features = []
        for img in X:
            if img.ndim == 3:  
                img = rgb2gray(img)
            if self.resize_shape:
                img = resize(img, self.resize_shape, anti_aliasing=True)
            # Extract HOG features
            features = hog(
                img,
                orientations=self.orientations,
                pixels_per_cell=self.pixels_per_cell,
                cells_per_block=self.cells_per_block,
                block_norm='L2-Hys',
                visualize=False
            )
            hog_features.append(features)
        return np.array(hog_features)


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

pipeline = Pipeline([
    ('hog', HOGFeatureExtractor()),  # Custom HOG feature extractor
    ('svm', SVC(kernel='rbf', C=1.0, gamma='scale'))  # SVM classifier
])


In [18]:

# Fit the pipeline
pipeline.fit(train_data, train_labels)
print("Model training completed.")


Model training completed.


In [12]:
test_df = pd.read_csv(test_csv)
test_data, test_labels = load_and_preprocess_images(test_df, test_images_path)


In [21]:
predict_data = pipeline.predict(test_data)

In [36]:
import pandas as pd
import numpy as np
from skimage.io import imread
from skimage.color import rgb2gray
from skimage.feature import hog
from skimage.transform import resize
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import RandomForestClassifier

pipeline = Pipeline([
    ('hog', HOGFeatureExtractor()),  # Custom HOG feature extractor
    ('scaler', StandardScaler()),  # Standardize the features
    ('pca', PCA(n_components=0.95)),  # Apply PCA for dimensionality reduction
    ('svm', SVC(kernel='rbf', C=10, gamma='auto'))  # SVM classifier
])

pipeline.fit(train_data, train_labels)

# Make predictions on the test set
test_predictions = pipeline.predict(test_data)


# Save predictions to CSV
test_df['label'] = test_predictions
test_df.to_csv('updated_test_predictions2.csv', index=False)

In [None]:
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import RandomForestClassifier

pipeline = Pipeline([
    ('hog', HOGFeatureExtractor(orientations = 9, pixels_per_cell=(8,8), cells_per_block=(3,3))),  # Custom HOG feature extractor
    ('scaler', StandardScaler()),  # Standardize the features
    ('pca', PCA(n_components=0.99)),  # Apply PCA for dimensionality reduction
    ('svm', SVC(kernel='rbf', C=10, gamma='auto'))  # SVM classifier
])


# Train the classifier
pipeline.fit(train_data, train_labels)

# Make predictions on the test set
test_predictions = pipeline.predict(test_data)

test_df['label'] = test_predictions
test_df.to_csv('updated_test_predictions_rf.csv', index=False)
print("Predictions saved to 'updated_test_predictions.csv'")

Predictions saved to 'updated_test_predictions_rf.csv'


In [2]:
from collections import defaultdict, deque

def lfu_page_replacement(page_references, num_frames):
    frames = []
    frequency = defaultdict(int)
    page_faults = 0
    page_order = deque()  # To keep track of the order of pages for tie-breaking

    for page in page_references:
        if page in frames:
            frequency[page] += 1
        else:
            page_faults += 1
            if len(frames) < num_frames:
                frames.append(page)
                frequency[page] = 1
                page_order.append(page)
            else:
                # Find the least frequently used page
                lfu_page = min(frames, key=lambda x: (frequency[x], page_order.index(x)))
                frames.remove(lfu_page)
                del frequency[lfu_page]
                page_order.remove(lfu_page)
                frames.append(page)
                frequency[page] = 1
                page_order.append(page)

        print(f"Page reference: {page}, Frames: {frames}, Page Faults: {page_faults}")

    return page_faults

page_references = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 7, 9, 3, 2, 3, 8, 4]
num_frames = 4
total_page_faults = lfu_page_replacement(page_references, num_frames)
print(f"Total Page Faults: {total_page_faults}")

Page reference: 3, Frames: [3], Page Faults: 1
Page reference: 1, Frames: [3, 1], Page Faults: 2
Page reference: 4, Frames: [3, 1, 4], Page Faults: 3
Page reference: 1, Frames: [3, 1, 4], Page Faults: 3
Page reference: 5, Frames: [3, 1, 4, 5], Page Faults: 4
Page reference: 9, Frames: [1, 4, 5, 9], Page Faults: 5
Page reference: 2, Frames: [1, 5, 9, 2], Page Faults: 6
Page reference: 6, Frames: [1, 9, 2, 6], Page Faults: 7
Page reference: 5, Frames: [1, 2, 6, 5], Page Faults: 8
Page reference: 3, Frames: [1, 6, 5, 3], Page Faults: 9
Page reference: 5, Frames: [1, 6, 5, 3], Page Faults: 9
Page reference: 8, Frames: [1, 5, 3, 8], Page Faults: 10
Page reference: 9, Frames: [1, 5, 8, 9], Page Faults: 11
Page reference: 7, Frames: [1, 5, 9, 7], Page Faults: 12
Page reference: 9, Frames: [1, 5, 9, 7], Page Faults: 12
Page reference: 3, Frames: [1, 5, 9, 3], Page Faults: 13
Page reference: 2, Frames: [1, 5, 9, 2], Page Faults: 14
Page reference: 3, Frames: [1, 5, 9, 3], Page Faults: 15
Page r

In [4]:
def clock_page_replacement(page_references, num_frames):
    frames = [-1] * num_frames
    reference_bits = [0] * num_frames
    pointer = 0
    page_faults = 0

    for page in page_references:
        if page in frames:
            reference_bits[frames.index(page)] = 1
        else:
            page_faults += 1
            while reference_bits[pointer] == 1:
                reference_bits[pointer] = 0
                pointer = (pointer + 1) % num_frames
            frames[pointer] = page
            reference_bits[pointer] = 1
            pointer = (pointer + 1) % num_frames

        print(f"Page reference: {page}, Frames: {frames}, Page Faults: {page_faults}, Reference Bits: {reference_bits}")

    return page_faults

page_references = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 7, 9, 3, 2, 3, 8, 4]
num_frames = 4
total_page_faults = clock_page_replacement(page_references, num_frames)
print(f"Total Page Faults: {total_page_faults}")

Page reference: 3, Frames: [3, -1, -1, -1], Page Faults: 1, Reference Bits: [1, 0, 0, 0]
Page reference: 1, Frames: [3, 1, -1, -1], Page Faults: 2, Reference Bits: [1, 1, 0, 0]
Page reference: 4, Frames: [3, 1, 4, -1], Page Faults: 3, Reference Bits: [1, 1, 1, 0]
Page reference: 1, Frames: [3, 1, 4, -1], Page Faults: 3, Reference Bits: [1, 1, 1, 0]
Page reference: 5, Frames: [3, 1, 4, 5], Page Faults: 4, Reference Bits: [1, 1, 1, 1]
Page reference: 9, Frames: [9, 1, 4, 5], Page Faults: 5, Reference Bits: [1, 0, 0, 0]
Page reference: 2, Frames: [9, 2, 4, 5], Page Faults: 6, Reference Bits: [1, 1, 0, 0]
Page reference: 6, Frames: [9, 2, 6, 5], Page Faults: 7, Reference Bits: [1, 1, 1, 0]
Page reference: 5, Frames: [9, 2, 6, 5], Page Faults: 7, Reference Bits: [1, 1, 1, 1]
Page reference: 3, Frames: [9, 2, 6, 3], Page Faults: 8, Reference Bits: [0, 0, 0, 1]
Page reference: 5, Frames: [5, 2, 6, 3], Page Faults: 9, Reference Bits: [1, 0, 0, 1]
Page reference: 8, Frames: [5, 8, 6, 3], Page F

To determine how many disk blocks are required to store the block allocation table for a 64 GiB thumb-drive with a block size of 4 KiB and 32-bit disk block pointers, we need to follow these steps:

Calculate the total number of disk blocks on the thumb-drive:

Total storage capacity: 64 GiB
Disk block size: 4 KiB
$$[ \text{Total number of disk blocks} = \frac{\text{Total storage capacity}}{\text{Disk block size}} ]
$$
$$
[ \text{Total number of disk blocks} = \frac{64 \times 2^{30} \text{ bytes}}{4 \times 2^{10} \text{ bytes}} ]
$$
$$
[ \text{Total number of disk blocks} = \frac{64 \times 2^{30}}{4 \times 2^{10}} = \frac{64 \times 2^{20}}{4} = 16 \times 2^{20} = 2^{24} ]
$$
So, the total number of disk blocks is (2^{24}).

Calculate the size of the block allocation table:

Each entry in the block allocation table is 32 bits (4 bytes).
The block allocation table needs one entry for each disk block.
$$
[ \text{Size of the block allocation table} = \text{Total number of disk blocks} \times \text{Size of each entry} ]
$$
$$
[ \text{Size of the block allocation table} = 2^{24} \times 4 \text{ bytes} = 2^{24} \times 2^2 \text{ bytes} = 2^{26} \text{ bytes} ]
$$

Calculate the number of disk blocks required to store the block allocation table:
$$
Disk block size: 4 KiB (which is (2^{12}) bytes)
[ \text{Number of disk blocks required} = \frac{\text{Size of the block allocation table}}{\text{Disk block size}} ]
$$
$$
[ \text{Number of disk blocks required} = \frac{2^{26} \text{ bytes}}{2^{12} \text{ bytes}} = 2^{14} ]
$$
So, the number of disk blocks required to store the block allocation table is (2^{14}).

Therefore, the number of disk blocks required to store the block allocation table is (2^{14} = 16,384) blocks.

Calculate the total number of disk blocks on the thumb-drive:

Total storage capacity: 64 GiB
Disk block size: 4 KiB
$$
[ \text{Total number of disk blocks} = \frac{\text{Total storage capacity}}{\text{Disk block size}} ]
$$
$$
[ \text{Total number of disk blocks} = \frac{64 \times 2^{30} \text{ bytes}}{4 \times 2^{10} \text{ bytes}} ]
$$
$$
[ \text{Total number of disk blocks} = \frac{64 \times 2^{30}}{4 \times 2^{10}} = \frac{64 \times 2^{20}}{4} = 16 \times 2^{20} = 2^{24} ]
$$
So, the total number of disk blocks is (2^{24}).

Calculate the size of the data block bitmap:

Each bit in the bitmap represents one disk block.
The total number of bits required is equal to the total number of disk blocks.
$$
[ \text{Size of the data block bitmap} = \text{Total number of disk blocks} \text{ bits} ]
$$
$$
[ \text{Size of the data block bitmap} = 2^{24} \text{ bits} ]
$$
Convert the size of the data block bitmap to bytes:

There are 8 bits in a byte.
$$
[ \text{Size of the data block bitmap in bytes} = \frac{\text{Size of the data block bitmap in bits}}{8} ]
$$
$$
[ \text{Size of the data block bitmap in bytes} = \frac{2^{24} \text{ bits}}{8} = 2^{21} \text{ bytes} ]
$$
Calculate the number of disk blocks required to store the data block bitmap:

Disk block size: 4 KiB (which is (2^{12}) bytes)
$$
[ \text{Number of disk blocks required} = \frac{\text{Size of the data block bitmap in bytes}}{\text{Disk block size}} ]
$$
$$
[ \text{Number of disk blocks required} = \frac{2^{21} \text{ bytes}}{2^{12} \text{ bytes}} = \frac{2^{21}}{2^{12}} = 2^{9} ]
$$
So, the number of disk blocks required to store the data block bitmap is (2^{9}).

Therefore, the number of disk blocks required to store the data block bitmap is (2^{9} = 512) blocks.

