In [8]:
import sys
import gc

# Memory allocation example
def demonstrate_memory_management():
    # Objects are allocated in private heap
    my_list = [1, 2, 3, 4, 5]  # Allocated in heap
    
    # Check object reference count
    ref_count = sys.getrefcount(my_list)
    print(f"Reference count: {ref_count}")
    
    # Memory usage
    memory_usage = sys.getsizeof(my_list)
    print(f"Memory usage: {memory_usage} bytes")

demonstrate_memory_management()

Reference count: 2
Memory usage: 104 bytes


In [9]:
import sys
import gc

# Memory allocation example
def demonstrate_memory_management():
    # Objects are allocated in private heap
    my_list = [1, 2, 3, 4,8]  # Allocated in heap
    
    # Check object reference count
    ref_count = sys.getrefcount(my_list)
    print(f"Reference count: {ref_count}")
    
    # Memory usage
    memory_usage = sys.getsizeof(my_list)
    print(f"Memory usage: {memory_usage} bytes")

demonstrate_memory_management()

Reference count: 2
Memory usage: 104 bytes


In [10]:
import sys

def reference_counting_demo():
    # Create object
    data = [1, 2, 3]
    print(f"Initial ref count: {sys.getrefcount(data)}")
    
    # Assign to another variable
    data2 = data
    print(f"After assignment: {sys.getrefcount(data)}")
    
    # Delete reference
    del data2
    print(f"After deletion: {sys.getrefcount(data)}")
reference_counting_demo()

Initial ref count: 2
After assignment: 3
After deletion: 2


In [None]:
import gc
import weakref

class MLModel:
    def __init__(self, name):
        self.name = name
        self.data = []

def garbage_collection_demo():
    # Create circular reference
    model1 = MLModel("Model1")
    model2 = MLModel("Model2")
    model1.partner = model2
    model2.partner = model1
    
    # Check garbage collection
    print(f"Objects before GC: {len(gc.get_objects())}")
    
    # Force garbage collection
    collected = gc.collect()
    print(f"Objects collected: {collected}")
    
    # Monitor object lifecycle
    def callback(ref):
        print("Object was garbage collected")
    
    weak_ref = weakref.ref(model1, callback)

garbage_collection_demo()

Objects before GC: 81478
Objects collected: 10


In [18]:
pip install memory-profiler


Collecting memory-profiler
  Using cached memory_profiler-0.61.0-py3-none-any.whl.metadata (20 kB)
Using cached memory_profiler-0.61.0-py3-none-any.whl (31 kB)
Installing collected packages: memory-profiler
Successfully installed memory-profiler-0.61.0
Note: you may need to restart the kernel to use updated packages.


In [None]:

import numpy as np
import pandas as pd
from memory_profiler import profile

class MemoryEfficientMLWorkflow:
    def __init__(self):
        self.data = None
        self.model = None
    
    @profile
    def load_and_process_data(self, filepath):
        """Memory-efficient data loading"""
        # Use chunking for large datasets
        chunk_size = 10000
        chunks = []
        
        for chunk in pd.read_csv(filepath, chunksize=chunk_size):
            # Process chunk
            processed_chunk = self.preprocess_chunk(chunk)
            chunks.append(processed_chunk)
        
        # Combine chunks efficiently
        self.data = pd.concat(chunks, ignore_index=True)
        
        # Clear intermediate variables
        del chunks
        gc.collect()
    
    def preprocess_chunk(self, chunk):
        """Memory-efficient preprocessing"""
        # Use view instead of copy when possible
        numeric_columns = chunk.select_dtypes(include=[np.number])
        
        # Optimize data types
        chunk = self.optimize_dtypes(chunk)
        
        return chunk
    
    def optimize_dtypes(self, df):
        """Optimize pandas dtypes to reduce memory"""
        for col in df.columns:
            if df[col].dtype == 'int64':
                if df[col].min() >= 0 and df[col].max() <= 255:
                    df[col] = df[col].astype('uint8')
                elif df[col].min() >= -128 and df[col].max() <= 127:
                    df[col] = df[col].astype('int8')
            elif df[col].dtype == 'float64':
                df[col] = df[col].astype('float32')
        
        return df
    
    def __del__(self):
        """Cleanup when object is destroyed"""
        if hasattr(self, 'data') and self.data is not None:
            del self.data
        gc.collect()




In [20]:
from datetime import datetime

class ModelRegistry:
    """Example of dictionary usage in ML workflows"""
    
    def __init__(self):
        # Store multiple models with configurations
        self.models = {}
        self.metrics = {}
        self.feature_importance = {}
    
    def register_model(self, model_name, model, config):
        """Register model with its configuration"""
        self.models[model_name] = {
            'model_object': model,
            'config': config,
            'trained': False,
            'timestamp': datetime.now()
        }
    
    def store_metrics(self, model_name, metrics_dict):
        """Store evaluation metrics for a model"""
        self.metrics[model_name] = metrics_dict
    
    def get_best_model(self, metric='accuracy'):
        """Find best performing model based on metric"""
        if not self.metrics:
            return None
        
        best_model = max(
            self.metrics.items(),
            key=lambda x: x[1].get(metric, 0)
        )
        return best_model[0]  # Return model name
    
    def compare_models(self):
        """Compare all models using stored metrics"""
        comparison = {}
        for model_name, metrics in self.metrics.items():
            comparison[model_name] = {
                'accuracy': metrics.get('accuracy', 0),
                'precision': metrics.get('precision', 0),
                'recall': metrics.get('recall', 0),
                'f1_score': metrics.get('f1_score', 0)
            }
        return comparison

# Usage example
registry = ModelRegistry()

# Register models
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

rf_config = {'n_estimators': 100, 'max_depth': 10}
svm_config = {'C': 1.0, 'kernel': 'rbf'}

registry.register_model('random_forest', RandomForestClassifier(**rf_config), rf_config)
registry.register_model('svm', SVC(**svm_config), svm_config)

# Store metrics
registry.store_metrics('random_forest', {
    'accuracy': 0.85, 'precision': 0.83, 'recall': 0.87, 'f1_score': 0.85
})
registry.store_metrics('svm', {
    'accuracy': 0.82, 'precision': 0.80, 'recall': 0.84, 'f1_score': 0.82
})

In [21]:
text_data = ['Hello World!', 'Python ML', 'Data Science@']
cleaned_text = [
    ''.join([char.lower() for char in text if char.isalnum() or char.isspace()])
    for text in text_data
]
cleaned_text

['hello world', 'python ml', 'data science']

In [None]:
# Create polynomial features
    numeric_cols = ['age', 'income', 'credit_score']
    polynomial_features = [
        f"{col}_squared" for col in numeric_cols
    ]
    
    # Add polynomial features to dataframe
    for i, col in enumerate(numeric_cols):
        df[polynomial_features[i]] = df[col] ** 2
    
    # Create interaction features
    interaction_features = [
        f"{col1}_{col2}_interaction" 
        for i, col1 in enumerate(numeric_cols)
        for col2 in numeric_cols[i+1:]
    ]
    
    # Log transformations for skewed features
    skewed_features = ['income', 'credit_score']
    log_features = [f"log_{col}" for col in skewed_features]
    
    for i, col in enumerate(skewed_features):
        df[log_features[i]] = np.log1p(df[col])

NameError: name 'df' is not defined

In [2]:
import numpy as np

In [None]:
print(np.random.RandomState(42).normal(loc=0.0,scale=0.01,size=10)) # Example usage of RandomState


0.04480611116987562


In [41]:
import numpy as np

class Perceptron:
    def __init__(self,learning_rate=0.01,n_iter=500,random_state=1):
        self.learning_rate = learning_rate
        self.n_iter = n_iter
        self.random_state = random_state
        self.weights=None
        self.bias=None
        self.error=[]
        
    def fit(self,X,y):
        random_generator = np.random.RandomState(self.random_state)
        self.weights = random_generator.normal(loc=0.0,scale=0.01,size=X.shape[1])
        self.bias = 0.0
        self.errors_=[]
        # what this errors_ ( _ means private variable)
        for _ in range(self.n_iter):
            errors=0
            for xi,target in zip(X,y):
                predication = self.predict(xi)
                update = self.learning_rate * (target - predication)
                self.weights += update * xi
                self.bias+=update
                errors += int(update !=0.0) # 
            self.error.append(errors)
            print(f'errors in iteration {_+1} : {errors}')
        print(f'Final weights: {self.weights}, Final bias: {self.bias}')
        print(f'Final error: {self.error}')
        print(f'Final prediction: {self.predict(X)}')
        print(f'Final net input: {self.net_input(X)}')
        return self
    def net_input(self,X):
        return np.dot(X,self.weights) + self.bias
    
    def predict(self,X):
        return np.where(self.net_input(X)>=100,1,0)
    
    

In [46]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

iris = datasets.load_iris()
X = iris.data[:100,[0,2]]
y = iris.target[:100]

y = np.where(y==0,0,1) # Convert to binary classification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

ppn = Perceptron(learning_rate=0.01, n_iter=500)
ppn.fit(X_train, y_train)

errors in iteration 1 : 42
errors in iteration 2 : 42
errors in iteration 3 : 42
errors in iteration 4 : 42
errors in iteration 5 : 42
errors in iteration 6 : 42
errors in iteration 7 : 42
errors in iteration 8 : 42
errors in iteration 9 : 42
errors in iteration 10 : 42
errors in iteration 11 : 42
errors in iteration 12 : 42
errors in iteration 13 : 42
errors in iteration 14 : 42
errors in iteration 15 : 42
errors in iteration 16 : 42
errors in iteration 17 : 42
errors in iteration 18 : 42
errors in iteration 19 : 42
errors in iteration 20 : 42
errors in iteration 21 : 42
errors in iteration 22 : 42
errors in iteration 23 : 42
errors in iteration 24 : 42
errors in iteration 25 : 42
errors in iteration 26 : 42
errors in iteration 27 : 42
errors in iteration 28 : 42
errors in iteration 29 : 42
errors in iteration 30 : 42
errors in iteration 31 : 42
errors in iteration 32 : 42
errors in iteration 33 : 42
errors in iteration 34 : 42
errors in iteration 35 : 42
errors in iteration 36 : 42
e

<__main__.Perceptron at 0x12585e6caa0>

In [47]:
y_pred = ppn.predict(X_test)

In [48]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 1.0
Confusion Matrix:
 [[12  0]
 [ 0  8]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00         8

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20

