# Age Category Prediction Using KNN
1. Project Objective: Predict the age category (e.g., child, teen, adult, senior) of individuals using the K-Nearest Neighbors (KNN) algorithm.

2. Input Features: Use features such as height, weight, activity levels, lifestyle habits, and demographic data.

3. Age Categories: Define clear categories, such as 0–12 (child), 13–19 (teen), 20–59 (adult), and 60+ (senior).

4. Data Collection: Collect a dataset with examples of individuals labeled with their correct age categories.

5. Preprocessing: Clean and normalize the data to ensure all features are on the same scale for KNN to work effectively.

6. KNN Algorithm: Use the KNN algorithm, which classifies a new data point by finding the majority category among its k nearest neighbors.

7. Parameter Tuning: Choose the best value of 
𝑘
k (number of neighbors) using cross-validation for optimal accuracy.

8. Model Evaluation: Test the model's performance using metrics like accuracy, precision, recall, and F1-score.

9. Visualization: Visualize the predictions using scatter plots or decision boundaries to explain the KNN model's workings.

10. Real-World Use: Apply the model to categorize ages for targeted services in healthcare, marketing, or education sectors.








In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import confusion_matrix
import warnings
warnings.filterwarnings('ignore')

In [2]:
data=pd.read_csv('Train.csv')
data

Unnamed: 0,ID,Class
0,0.jpg,MIDDLE
1,1.jpg,OLD
2,3.jpg,MIDDLE
3,4.jpg,MIDDLE
4,5.jpg,YOUNG
...,...,...
495,645.jpg,YOUNG
496,646.jpg,MIDDLE
497,648.jpg,YOUNG
498,649.jpg,MIDDLE


In [3]:
data['Class'].replace(['YOUNG','MIDDLE','OLD'],[0,1,2],inplace=True)
data

Unnamed: 0,ID,Class
0,0.jpg,1
1,1.jpg,2
2,3.jpg,1
3,4.jpg,1
4,5.jpg,0
...,...,...
495,645.jpg,0
496,646.jpg,1
497,648.jpg,0
498,649.jpg,1


In [4]:
def read_img(path,ch=3,resize=(150,150)):
    di=tf.io.read_file(path)
    di=tf.image.decode_jpeg(di,channels=ch)
    ddi=tf.imge.convert_image_dtype(di,dtype=tf.flat32)
    di=tf.image.resize(di,resize)
    return di

In [5]:
def load_data(image_path,label):
    img=read_img(image_path,3,(150,150))
    return(img,label)

In [6]:
PATH='Train'
image_paths=[]

In [19]:
import os
import pandas as pd

# Example directory and DataFrame
PATH = "Train"
data = pd.DataFrame({
    'ID': ['file1', 'file2'],  # Example IDs
    'Class': ['cat', 'dog']   # Example classes
})

# Initialize lists
image_paths = []
response_list = []

# Build image paths
for file in os.listdir(PATH):
    full_path = os.path.join(PATH, file)
    image_paths.append(full_path)

# Process image paths
for i in image_paths:
    _, tail = os.path.split(i)
    tail = os.path.splitext(tail)[0]
    if tail not in data['ID'].values:
        print(f"ID '{tail}' not found in the DataFrame. Appending 'Unknown'.")
        response_list.append('Unknown')
    else:
        response = data.loc[data['ID'] == tail, 'Class'].values[0]
        response_list.append(response)

# Outputs
print("Image Paths:", image_paths)
print("Response List:", response_list)


ID '0' not found in the DataFrame. Appending 'Unknown'.
ID '1' not found in the DataFrame. Appending 'Unknown'.
ID '100' not found in the DataFrame. Appending 'Unknown'.
ID '101' not found in the DataFrame. Appending 'Unknown'.
ID '102' not found in the DataFrame. Appending 'Unknown'.
ID '103' not found in the DataFrame. Appending 'Unknown'.
ID '106' not found in the DataFrame. Appending 'Unknown'.
ID '107' not found in the DataFrame. Appending 'Unknown'.
ID '108' not found in the DataFrame. Appending 'Unknown'.
ID '109' not found in the DataFrame. Appending 'Unknown'.
ID '11' not found in the DataFrame. Appending 'Unknown'.
ID '111' not found in the DataFrame. Appending 'Unknown'.
ID '112' not found in the DataFrame. Appending 'Unknown'.
ID '114' not found in the DataFrame. Appending 'Unknown'.
ID '115' not found in the DataFrame. Appending 'Unknown'.
ID '116' not found in the DataFrame. Appending 'Unknown'.
ID '117' not found in the DataFrame. Appending 'Unknown'.
ID '118' not found 

In [8]:
train_size=int(0.9*(len(image_paths)))
test_size=int(0.1*(len(image_paths)))
train_size


450

In [9]:
train_set=tf.data.Dataset.from_tensor_slices((image_paths[:train_size][:train_size]))
test_set=tf.data.Dataset.from_tensor_slices((image_paths[train_size:][train_size:]))
len(train_set)
train_set

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

In [10]:
import tensorflow as tf

# Dummy dataset creation
raw_train_data = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4])
raw_test_data = tf.data.Dataset.from_tensor_slices([5, 6, 7, 8])

# Define read_data function
def read_data(x):
    # Example transformation: multiply by 2
    return x * 2

# Define load_data function
def load_data(x):
    # Example transformation: add 1
    return x + 1

# Create train_set
train_set = (
    raw_train_data.map(read_data, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(64)
    .prefetch(tf.data.AUTOTUNE)
)

# Create test_set
test_set = (
    raw_test_data.map(load_data, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(64)
    .prefetch(tf.data.AUTOTUNE)
)

# Convert to list
print("Train Set:", list(train_set))
print("Test Set:", list(test_set))


Train Set: [<tf.Tensor: shape=(4,), dtype=int32, numpy=array([2, 4, 6, 8])>]
Test Set: [<tf.Tensor: shape=(4,), dtype=int32, numpy=array([6, 7, 8, 9])>]


In [11]:

train_set = (
    train_set.map(read_data, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(64)
    .prefetch(tf.data.AUTOTUNE)
)

test_set = (
    test_set.map(load_data, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(64)
    .prefetch(tf.data.AUTOTUNE)
)

list(train_set)


[<tf.Tensor: shape=(1, 4), dtype=int32, numpy=array([[ 4,  8, 12, 16]])>]

In [12]:
from tensorflow.keras import layers, models

cnn = models.Sequential([
    layers.Conv2D(filters=30, kernel_size=(3, 3), activation='relu', input_shape=(150, 150, 3)),  # Fixed 'real' to 'relu'
    layers.MaxPooling2D(pool_size=(2, 2)),  # Corrected syntax for MaxPooling2D
    layers.Flatten(),
    layers.Dense(64, activation='relu'),  # Fixed 'real' to 'relu'
    layers.Dense(3, activation='sigmoid')  # Removed extra period
])


In [13]:
cnn.compile(optimizer='adam',loss='spanse_categorical_crossentrology',metrics=['accuracy'])
cnn.summary()

In [14]:
import tensorflow as tf

# Function to read and preprocess images
def preprocess_image(file_path):
    # Read the image
    image = tf.io.read_file(file_path)
    image = tf.image.decode_jpeg(image, channels=3)  # Decode as RGB
    image = tf.image.resize(image, [150, 150])  # Resize to match the model input
    image = image / 255.0  # Normalize pixel values to [0, 1]
    return image

# Example Dataset with Image Paths
train_image_paths = ["path_to_image1.jpg", "path_to_image2.jpg"]  # Replace with actual paths
train_set = tf.data.Dataset.from_tensor_slices(train_image_paths)
train_set = train_set.map(preprocess_image).batch(64).prefetch(tf.data.AUTOTUNE)

test_image_paths = ["path_to_image3.jpg", "path_to_image4.jpg"]  # Replace with actual paths
test_set = tf.data.Dataset.from_tensor_slices(test_image_paths)
test_set = test_set.map(preprocess_image).batch(64).prefetch(tf.data.AUTOTUNE)


In [15]:
def preprocess_image_with_label(file_path, label):
    image = preprocess_image(file_path)  # Use the earlier preprocess function
    return image, label

train_labels = [0, 1]  # Example labels
train_set = tf.data.Dataset.from_tensor_slices((train_image_paths, train_labels))
train_set = train_set.map(preprocess_image_with_label).batch(64).prefetch(tf.data.AUTOTUNE)


In [16]:
cnn.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',  # Replace with correct loss
    metrics=['accuracy']
)


In [24]:
import os
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder

# Path to the folder containing the images
PATH = "Train"  # Replace with your actual folder path

# Function to preprocess an image
def preprocess_image(file_path):
    try:
        # Read and decode the image
        image = tf.io.read_file(file_path)
        image = tf.image.decode_jpeg(image, channels=3)
        # Resize the image to the target size
        image = tf.image.resize(image, [150, 150])
        return image
    except tf.errors.NotFoundError:
        print(f"File not found: {file_path}. Using a placeholder image.")
        return tf.zeros([150, 150, 3])  # Return a dummy image if file is missing

# Function to load data with labels
def load_data(file_path, label):
    image = preprocess_image(file_path)
    return image, label

# Generate dataset paths and labels
image_paths = []
labels = []  # Replace this with actual labels, e.g., from a CSV or DataFrame

if os.path.exists(PATH):
    for file_name in os.listdir(PATH):
        full_path = os.path.join(PATH, file_name)
        image_paths.append(full_path)
        # Assuming file names contain labels (e.g., "cat_1.jpg")
        labels.append(file_name.split("_")[0])  # Adjust based on your label format
else:
    raise FileNotFoundError(f"The directory {PATH} does not exist.")

# Encode labels to integers
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Create TensorFlow datasets
train_set = (
    tf.data.Dataset.from_tensor_slices((image_paths, encoded_labels))
    .map(load_data, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(64)
    .prefetch(tf.data.AUTOTUNE)
)

# Define a simple CNN model
cnn = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation="relu", input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(len(set(encoded_labels)), activation="softmax")  # Number of classes
])

cnn.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Train the model
cnn.fit(train_set, epochs=1)

# Predict on the train set (for testing purposes, use separate test data in real cases)
pred = cnn.predict(train_set)

# Convert predictions to lists
pred_list = [list(p) for p in pred]

# Access the first prediction if required
first_pred = pred_list[0]

# Outputs
print("Predictions List:", pred_list)
print("First Prediction:", first_pred)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 612ms/step - accuracy: 0.0000e+00 - loss: 2299.2903
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 96ms/step
Predictions List: [[0.0, 0.0, 3.0518918e-21, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.041922e-34, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.529698e-09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.131186e-14, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.