**Reasoning**:
Import pandas and load the crop recommendation dataset into a DataFrame.



In [2]:
### **1. Calculate Mean, Median, and Mode**


#### **Python Code:**

import numpy as np
from scipy import stats

data = [10, 20, 20, 30, 40, 50, 60, 70, 80, 90]

mean = np.mean(data)
median = np.median(data)
mode = stats.mode(data).mode[0]

print(f"Mean: {mean}, Median: {median}, Mode: {mode}")


IndexError: invalid index to scalar variable.

In [None]:
### **2. Compute Variance and Standard Deviation**
#### **Formulae:**
#- **Variance (σ²)** = \( \frac{\sum (x_i - \bar{x})^2}{n} \)
#- **Standard Deviation (σ)** = \( \sqrt{\text{Variance}} \)

#### **Python Code:**

import numpy as np

data = [12, 15, 14, 10, 18, 20, 25, 30]

variance = np.var(data, ddof=1)  # ddof=1 for sample variance
std_dev = np.sqrt(variance)

print(f"Variance: {variance}, Standard Deviation: {std_dev}")

In [None]:
### **3. Create and Classify a Dataset (Nominal, Ordinal, Interval, Ratio)**
#### **Classification:**
```python
dataset = {
    "Nominal": ["Red", "Blue", "Green", "Yellow"],  # Categorical with no order
    "Ordinal": ["Low", "Medium", "High"],  # Categorical with order
    "Interval": [10, 20, 30, 40],  # Equal intervals, no true zero
    "Ratio": [2.5, 5.0, 7.5, 10.0]  # Equal intervals, true zero
}

for category, values in dataset.items():
    print(f"{category}: {values}")

In [None]:
### **4. Implement Random and Stratified Sampling**
```python
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Creating a sample dataset
data = pd.DataFrame({
    'Category': np.random.choice(['A', 'B', 'C'], 100),
    'Values': np.random.randint(1, 100, 100)
})

# Simple Random Sampling
random_sample = data.sample(n=10)

# Stratified Sampling
stratified_sample = data.groupby('Category', group_keys=False).apply(lambda x: x.sample(3))

print("Random Sample:\n", random_sample)
print("\nStratified Sample:\n", stratified_sample)
```

In [None]:
### **5. Calculate Range**
```python
def calculate_range(data):
    return max(data) - min(data)

data = [3, 7, 2, 9, 10, 15, 20]
print("Range:", calculate_range(data))
```


In [None]:

### **6. Create a Histogram to Visualize Skewness**
```python
import matplotlib.pyplot as plt

data = np.random.normal(10, 5, 1000)  # Generating normal distribution

plt.hist(data, bins=30, edgecolor='black')
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.title("Histogram")
plt.show()
```

In [None]:
### **7. Calculate Skewness and Kurtosis**
```python
from scipy.stats import skew, kurtosis

data = np.random.normal(0, 1, 1000)

print("Skewness:", skew(data))
print("Kurtosis:", kurtosis(data))
```


In [None]:
### **8. Generate Positive and Negative Skewness**
```python
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Positively skewed data
data_pos = np.random.exponential(scale=2, size=1000)

# Negatively skewed data
data_neg = np.random.normal(loc=-5, scale=2, size=1000)

# Plot
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
sns.histplot(data_pos, bins=30, ax=axes[0], kde=True)
sns.histplot(data_neg, bins=30, ax=axes[1], kde=True)

axes[0].set_title("Positively Skewed Data")
axes[1].set_title("Negatively Skewed Data")

plt.show()

In [None]:
### **9. Calculate Covariance**
```python
import numpy as np

x = [1, 2, 3, 4, 5]
y = [2, 3, 5, 7, 11]

cov_matrix = np.cov(x, y, bias=False)
print("Covariance:\n", cov_matrix[0][1])
```

In [None]:

### **10. Calculate Correlation Coefficient**
```python
import numpy as np

x = [1, 2, 3, 4, 5]
y = [2, 3, 5, 7, 11]

correlation = np.corrcoef(x, y)[0, 1]
print("Correlation Coefficient:", correlation)

In [None]:
### **11. Scatter Plot of Two Variables**
```python
import matplotlib.pyplot as plt

x = np.random.randint(1, 100, 50)
y = np.random.randint(1, 100, 50)

plt.scatter(x, y)
plt.xlabel("Variable X")
plt.ylabel("Variable Y")
plt.title("Scatter Plot of Two Variables")
plt.show()
```

In [None]:
### **12. Compare Simple Random and Systematic Sampling**
```python
import numpy as np

data = np.arange(1, 101)

# Simple Random Sampling
random_sample = np.random.choice(data, size=10, replace=False)

# Systematic Sampling (every 10th element)
systematic_sample = data[::10]

print("Random Sample:", random_sample)
print("Systematic Sample:", systematic_sample)
```

In [None]:
### **13. Calculate Mean, Median, and Mode for Grouped Data**
```python
import pandas as pd
from scipy.stats import mode

# Example of grouped data
data = pd.DataFrame({
    "Class Interval": ["1-10", "11-20", "21-30"],
    "Frequency": [5, 15, 10]
})

# Calculate mean approximation using midpoint method
data["Midpoint"] = [5.5, 15.5, 25.5]
mean = sum(data["Midpoint"] * data["Frequency"]) / sum(data["Frequency"])

# Median Class Calculation
cumulative_frequency = data["Frequency"].cumsum()
median_class = data.iloc[(cumulative_frequency >= cumulative_frequency.max()/2).idxmax()]

print("Grouped Mean:", mean)
print("Median Class:", median_class)
```

In [None]:
### **14. Simulate Data and Calculate Central Tendency & Dispersion**
```python
import numpy as np
import pandas as pd

data = np.random.randint(10, 100, 50)

df = pd.DataFrame(data, columns=["Values"])

mean = df["Values"].mean()
median = df["Values"].median()
mode = df["Values"].mode().values[0]
std_dev = df["Values"].std()
variance = df["Values"].var()

print(f"Mean: {mean}, Median: {median}, Mode: {mode}, Std Dev: {std_dev}, Variance: {variance}")

In [2]:

import pandas as pd
import numpy as np

# Create a sample dataset
data = pd.DataFrame({
    "A": np.random.randint(10, 100, 50),
    "B": np.random.randint(20, 150, 50),
    "C": np.random.normal(50, 15, 50)
})

# Summary statistics
print(data.describe())


In [2]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(8,5))
sns.boxplot(data=data)
plt.title("Boxplot of Dataset")
plt.show()


In [2]:
Q1 = data.quantile(0.25)
Q3 = data.quantile(0.75)
IQR = Q3 - Q1

print("Interquartile Range (IQR):\n", IQR)


In [2]:
from scipy.stats import zscore

data_zscore = data.apply(zscore)
print(data_zscore.head())


In [2]:
data1 = np.random.normal(50, 10, 100)
data2 = np.random.normal(50, 20, 100)

std1 = np.std(data1, ddof=1)
std2 = np.std(data2, ddof=1)

print(f"Dataset 1 Standard Deviation: {std1}")
print(f"Dataset 2 Standard Deviation: {std2}")


In [2]:
import seaborn as sns

cov_matrix = data.cov()
plt.figure(figsize=(6,5))
sns.heatmap(cov_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Covariance Heatmap")
plt.show()


In [None]:
corr_matrix = data.corr()
plt.figure(figsize=(6,5))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Matrix")
plt.show()


In [2]:
data_sample = np.random.randint(10, 100, 50)

variance = np.var(data_sample, ddof=1)
std_dev = np.std(data_sample, ddof=1)

print(f"Variance: {variance}, Standard Deviation: {std_dev}")


In [2]:
from scipy.stats import skew, kurtosis
import seaborn as sns

sns.histplot(data["A"], kde=True)
plt.title("Skewness and Kurtosis Visualization")
plt.show()

print("Skewness:", skew(data["A"]))
print("Kurtosis:", kurtosis(data["A"]))


In [None]:
from scipy.stats import pearsonr, spearmanr

x = np.random.randint(1, 100, 50)
y = np.random.randint(1, 100, 50)

pearson_corr, _ = pearsonr(x, y)
spearman_corr, _ = spearmanr(x, y)

print(f"Pearson Correlation: {pearson_corr}")
print(f"Spearman Correlation: {spearman_corr}")
