<a href="https://colab.research.google.com/github/basry87878/Big_Data_Partitioning_461/blob/main/Big_Data_Partitioning_461.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np

# --------------------------
# Load data
# --------------------------
file = '/content/drive/My Drive/heart.csv'
data = pd.read_csv(file)
print("Data shape:", data.shape)
data.head()



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Data shape: (303, 14)


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
# --------------------------
# Compute range boundaries
# --------------------------
maxAge = data.age.max()
minAge = data.age.min()
diff = maxAge - minAge
range1 = minAge + diff/4
range2 = range1 + diff/4
range3 = range2 + diff/4
range4 = range3 + diff/4
print("Age ranges:", minAge, range1, range2, range3, range4)



Age ranges: 29 41.0 53.0 65.0 77.0


In [4]:
# --------------------------
# Partitioning
# --------------------------

# Range Partitioning
rangePartition1 = data[data['age'] < range1]
rangePartition2 = data[(data['age'] >= range1) & (data['age'] < range2)]
rangePartition3 = data[(data['age'] >= range2) & (data['age'] < range3)]
rangePartition4 = data[data['age'] >= range3]

# List Partitioning (based on target)
listPartition1 = data[data['target'] == 0]
listPartition2 = data[data['target'] == 1]

# Hash Partitioning (based on age % 4)
hashPartition1 = data[data['age'] % 4 == 0]
hashPartition2 = data[data['age'] % 4 == 1]
hashPartition3 = data[data['age'] % 4 == 2]
hashPartition4 = data[data['age'] % 4 == 3]


In [5]:
# --------------------------
# Function to compute iterations to find a specific element
# --------------------------
def count_iterations(df, column, target_value):
    count = 0
    for val in df[column]:
        count += 1
        if val == target_value:
            break
    return count

target_age = 77  # example

# Search in full dataset
iterations_original = count_iterations(data, "age", target_age)

# Search in each partition type
# Range
range_partitions = [rangePartition1, rangePartition2, rangePartition3, rangePartition4]
range_iterations = sum(count_iterations(p, "age", target_age) for p in range_partitions if target_age in p["age"].values)

# List
list_partitions = [listPartition1, listPartition2]
list_iterations = sum(count_iterations(p, "age", target_age) for p in list_partitions if target_age in p["age"].values)

# Hash
hash_partitions = [hashPartition1, hashPartition2, hashPartition3, hashPartition4]
hash_iterations = sum(count_iterations(p, "age", target_age) for p in hash_partitions if target_age in p["age"].values)

# --------------------------
# Compare results
# --------------------------
print("\n=== Iterations to find age =", target_age, "===")
print("Original (no partition):", iterations_original)
print("Range partitioning:", range_iterations)
print("List partitioning:", list_iterations)
print("Hash partitioning:", hash_iterations)




=== Iterations to find age = 77 ===
Original (no partition): 239
Range partitioning: 32
List partitioning: 74
Hash partitioning: 55


In [6]:
# --------------------------
# Append a new record example
# --------------------------
new_record = {'age': 55, 'sex': 1, 'cp': 2, 'trestbps': 120, 'chol': 240,
              'fbs': 0, 'restecg': 1, 'thalach': 160, 'exang': 0,
              'oldpeak': 2.3, 'slope': 2, 'ca': 0, 'thal': 2, 'target': 1}

# Append to range partition
if new_record['age'] < range1:
    rangePartition1 = pd.concat([rangePartition1, pd.DataFrame([new_record])])
elif new_record['age'] < range2:
    rangePartition2 = pd.concat([rangePartition2, pd.DataFrame([new_record])])
elif new_record['age'] < range3:
    rangePartition3 = pd.concat([rangePartition3, pd.DataFrame([new_record])])
else:
    rangePartition4 = pd.concat([rangePartition4, pd.DataFrame([new_record])])

# Append to list partition
if new_record['target'] == 0:
    listPartition1 = pd.concat([listPartition1, pd.DataFrame([new_record])])
else:
    listPartition2 = pd.concat([listPartition2, pd.DataFrame([new_record])])

# Append to hash partition
partition_index = new_record['age'] % 4
if partition_index == 0:
    hashPartition1 = pd.concat([hashPartition1, pd.DataFrame([new_record])])
elif partition_index == 1:
    hashPartition2 = pd.concat([hashPartition2, pd.DataFrame([new_record])])
elif partition_index == 2:
    hashPartition3 = pd.concat([hashPartition3, pd.DataFrame([new_record])])
else:
    hashPartition4 = pd.concat([hashPartition4, pd.DataFrame([new_record])])

print("\n✅ New record appended successfully to all partition types.")


✅ New record appended successfully to all partition types.
