# Azure ML Sample Notebook

This notebook demonstrates basic Azure ML operations and data analysis.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from azureml.core import Workspace, Dataset
from azureml.core.authentication import ServicePrincipalAuthentication

print("Libraries imported successfully!")

In [None]:
# Connect to Azure ML Workspace
try:
    # Try to get workspace from config
    ws = Workspace.from_config()
    print(f"Connected to workspace: {ws.name}")
    print(f"Resource group: {ws.resource_group}")
    print(f"Location: {ws.location}")
except Exception as e:
    print(f"Could not connect to workspace: {e}")
    print("This is expected in test environment")

In [None]:
# Create sample data for analysis
np.random.seed(42)
data = {
    'feature1': np.random.normal(0, 1, 1000),
    'feature2': np.random.normal(2, 1.5, 1000),
    'feature3': np.random.exponential(1, 1000),
    'target': np.random.choice([0, 1], 1000, p=[0.6, 0.4])
}

df = pd.DataFrame(data)
print("Sample data created:")
print(df.head())
print(f"\nDataset shape: {df.shape}")

In [None]:
# Basic data analysis
print("Dataset Statistics:")
print(df.describe())

print("\nTarget distribution:")
print(df['target'].value_counts())

print("\nCorrelation matrix:")
print(df.corr())

In [None]:
# Create visualizations
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Feature distributions
df['feature1'].hist(bins=30, ax=axes[0,0], title='Feature 1 Distribution')
df['feature2'].hist(bins=30, ax=axes[0,1], title='Feature 2 Distribution')
df['feature3'].hist(bins=30, ax=axes[1,0], title='Feature 3 Distribution')

# Target distribution
df['target'].value_counts().plot(kind='bar', ax=axes[1,1], title='Target Distribution')

plt.tight_layout()
plt.show()

print("Visualizations created successfully!")

In [None]:
# Simple machine learning model
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Prepare features and target
X = df[['feature1', 'feature2', 'feature3']]
y = df['target']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

In [None]:
# Feature importance
feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)

print("Feature Importance:")
print(feature_importance)

# Plot feature importance
plt.figure(figsize=(8, 6))
plt.bar(feature_importance['feature'], feature_importance['importance'])
plt.title('Feature Importance')
plt.xlabel('Features')
plt.ylabel('Importance')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## Summary

This notebook demonstrated:
1. Azure ML workspace connection
2. Data creation and analysis
3. Data visualization
4. Machine learning model training
5. Model evaluation and feature importance

All operations completed successfully!