**Module 1**

In [None]:
# Example: Categorizing analytics types
analytics_types = {
    "Descriptive": "Understanding what happened",
    "Diagnostic": "Understanding why it happened",
    "Predictive": "Predicting what could happen",
    "Prescriptive": "Suggesting what should be done"
}

for key, value in analytics_types.items():
    print(f"{key}: {value}")


In [None]:
# Example: Comparing AI and Traditional Analytics
traditional = "Relies on predefined rules and algorithms."
ai = "Uses machine learning and adaptive algorithms."

print("Traditional Analytics:", traditional)
print("AI Analytics:", ai)


In [None]:
# Lifecycle stages as a list
lifecycle_stages = [
    "Problem Definition",
    "Data Collection",
    "Data Cleaning",
    "Data Analysis",
    "Data Interpretation",
    "Reporting and Decision Making"
]

for stage in lifecycle_stages:
    print(stage)


**Module 2**

In [None]:
# Example: Listing internal and external data sources
internal_sources = ["Sales Database", "Customer Database"]
external_sources = ["Public APIs", "Social Media Data"]

print("Internal Data Sources:", internal_sources)
print("External Data Sources:", external_sources)


In [None]:
import pandas as pd

# Example: Handling Missing Values
data = {'A': [1, 2, None], 'B': [4, None, 6]}
df = pd.DataFrame(data)

# Fill missing values
df.fillna(df.mean(), inplace=True)
print("Cleaned Data:\n", df)


In [None]:
# Example: Data Normalization
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(df)

print("Normalized Data:\n", normalized_data)


In [None]:
# Example: Using AI for data preprocessing
from sklearn.impute import SimpleImputer

# Using SimpleImputer to handle missing values
imputer = SimpleImputer(strategy='mean')
imputed_data = imputer.fit_transform(df)

print("Imputed Data:\n", imputed_data)


**Module 3**

In [None]:
# Example: Basic statistics
data = [1, 2, 3, 4, 5]
mean = sum(data) / len(data)
print("Mean:", mean)


In [None]:
import matplotlib.pyplot as plt

# Example: Creating a histogram
plt.hist(data, bins=5)
plt.title("Histogram")
plt.xlabel("Values")
plt.ylabel("Frequency")
plt.show()


In [None]:
# Example: Automated EDA using pandas-profiling
from pandas_profiling import ProfileReport

df = pd.DataFrame(data)
profile = ProfileReport(df)
profile.to_file("eda_report.html")


**Module 4**

In [None]:
# Example: Summary statistics using pandas
summary_stats = df.describe()
print("Summary Statistics:\n", summary_stats)


In [None]:
# Example: Checking data distribution
import seaborn as sns

sns.histplot(data, kde=True)
plt.title("Data Distribution with KDE")
plt.show()


**Module 5**

In [None]:
# Example: Detecting anomalies using Z-score
from scipy import stats

z_scores = stats.zscore(df)
print("Z-scores:\n", z_scores)


In [None]:
# Example: A/B Testing
import scipy.stats as stats

group_a = [20, 30, 50]
group_b = [30, 20, 40]

t_stat, p_value = stats.ttest_ind(group_a, group_b)
print("T-statistic:", t_stat, "P-value:", p_value)


**Module 6**

In [None]:
# Example: Simple linear regression
from sklearn.linear_model import LinearRegression

X = [[1], [2], [3]]
y = [1, 2, 3]

model = LinearRegression()
model.fit(X, y)

predictions = model.predict([[4]])
print("Prediction for input 4:", predictions)


In [None]:
# Example: Evaluating model using confusion matrix
from sklearn.metrics import confusion_matrix

y_true = [0, 1, 1, 0]
y_pred = [0, 0, 1, 1]

cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:\n", cm)


**Module 7**

In [None]:
# Example: Linear programming with scipy
from scipy.optimize import linprog

# Minimize c @ x subject to Ax <= b
c = [1, 2]
A = [[-1, -1], [1, 2]]
b = [-1, 6]

result = linprog(c, A_ub=A, b_ub=b)
print("Optimal value:", result.fun, "at x:", result.x)


**Module 8**

In [None]:
# Example: List of programming languages for data analytics
languages = ["Python", "R", "SQL"]
print("Programming Languages for Data Analytics:", languages)


**Module 9**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Sample Data
data = pd.read_csv('finance_data.csv')  # Load your finance data
X = data.drop('Risk_Label', axis=1)
y = data['Risk_Label']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Model Training
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Predictions
predictions = model.predict(X_test)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

# Load healthcare data
data = pd.read_csv('healthcare_data.csv')
X = data.drop('Disease', axis=1)
y = data['Disease']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Model Training
model = SVC()
model.fit(X_train, y_train)

# Predictions
predictions = model.predict(X_test)


In [None]:
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Load marketing data
data = pd.read_csv('marketing_data.csv')
X = data[['Age', 'Income']]

# K-Means Clustering
kmeans = KMeans(n_clusters=3)
data['Cluster'] = kmeans.fit_predict(X)

# Plotting
plt.scatter(data['Age'], data['Income'], c=data['Cluster'])
plt.xlabel('Age')
plt.ylabel('Income')
plt.title('Customer Segmentation')
plt.show()


In [None]:
import pandas as pd
from scipy.optimize import linprog

# Sample Supply Chain Data
c = [2, 3]  # Cost coefficients
A = [[-1, -1], [1, 0], [0, 1]]  # Inequality coefficients
b = [-5, 0, 0]  # Right-hand side

# Linear Programming
result = linprog(c, A_ub=A, b_ub=b)
print('Optimal Solution:', result.x)


In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Load data
data = pd.read_csv('maintenance_data.csv')
X = data.drop('Failure', axis=1)
y = data['Failure']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Model Training
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Predictions
predictions = model.predict(X_test)


In [None]:
import pandas as pd
from sklearn.ensemble import IsolationForest

# Load transaction data
data = pd.read_csv('transaction_data.csv')

# Model for fraud detection
model = IsolationForest(contamination=0.01)
data['Fraud'] = model.fit_predict(data)

# Fraud transactions
fraud_transactions = data[data['Fraud'] == -1]


In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor

# Load customer data
data = pd.read_csv('customer_data.csv')
X = data.drop('Purchase_Amount', axis=1)
y = data['Purchase_Amount']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Model Training
model = RandomForestRegressor()
model.fit(X_train, y_train)

# Predictions
predictions = model.predict(X_test)


**Module 10**

In [None]:
import pandas as pd
import numpy as np

# Sample data
data = {'name': ['Alice', 'Bob', 'Charlie'], 'email': ['alice@example.com', 'bob@example.com', 'charlie@example.com']}
df = pd.DataFrame(data)

# Anonymizing email addresses
df['email'] = df['email'].apply(lambda x: x.split('@')[0] + '@domain.com')
print(df)


In [None]:
import json

# Sample user data
user_data = {
    'name': 'Alice',
    'email': 'alice@example.com',
    'preferences': {'newsletters': True, 'ads': False}
}

# Function to simulate a data access request
def request_user_data(user_data):
    return json.dumps(user_data, indent=2)

print(request_user_data(user_data))


In [None]:
import pandas as pd
from sklearn.metrics import confusion_matrix

# Sample predictions and true labels
y_true = [0, 1, 1, 0, 1]
y_pred = [0, 0, 1, 0, 1]

# Confusion matrix to analyze bias
cm = confusion_matrix(y_true, y_pred)
print(f"Confusion Matrix:\n{cm}")


In [None]:
from jsonschema import validate, ValidationError

# Sample schema for ethical data usage
schema = {
    "type": "object",
    "properties": {
        "data_usage": {
            "type": "string",
            "enum": ["marketing", "research", "internal"]
        }
    },
    "required": ["data_usage"]
}

# Sample data usage
data_usage_info = {"data_usage": "marketing"}

# Validate usage
try:
    validate(instance=data_usage_info, schema=schema)
    print("Data usage policy is followed.")
except ValidationError as e:
    print("Data usage policy violation:", e.message)


In [None]:
import csv
from datetime import datetime

# Function to log actions
def log_action(action):
    with open('audit_log.csv', mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([datetime.now(), action])

# Example action
log_action("User data accessed.")


# Ethical AI Guidelines
1. **Fairness**: Ensure algorithms do not discriminate based on race, gender, or other attributes.
2. **Transparency**: Provide clear explanations for AI decisions.
3. **Accountability**: Establish clear responsibilities for AI outputs.
4. **Privacy**: Ensure data is handled in compliance with GDPR and CCPA.


In [None]:
import pandas as pd
import numpy as np

# Sample biased dataset
data = {'gender': ['male', 'female', 'female', 'male', 'female'], 'score': [1, 2, 3, 4, 2]}
df = pd.DataFrame(data)

# Mitigating bias by balancing classes
df_balanced = df.groupby('gender').apply(lambda x: x.sample(df['gender'].value_counts().min(), random_state=1))
print(df_balanced)


In [None]:
import pandas as pd

# Sample AI model performance metrics
metrics = {
    'model': ['Model A', 'Model B'],
    'accuracy': [0.95, 0.90],
    'fairness_score': [0.85, 0.80]
}
df_metrics = pd.DataFrame(metrics)

# Analyzing performance
ethical_practices = df_metrics[df_metrics['fairness_score'] < 0.85]
print("Models needing improvement on fairness:", ethical_practices)


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Sample data report
data = {'metric': ['accuracy', 'precision', 'recall'], 'value': [0.95, 0.90, 0.85]}
df_report = pd.DataFrame(data)

# Generating a bar plot
df_report.set_index('metric').plot(kind='bar')
plt.title('Model Performance Metrics')
plt.ylabel('Score')
plt.savefig('model_performance_report.png')
plt.show()
