# Basic Python for Data Science Review
This notebook covers essential Python concepts needed for data science.
We'll review basic Python and key data science libraries.


# 1. Basic Python Review

## 1.1 Variables and Data Types


In [None]:
# Numeric types
x = 5         # integer
y = 3.14      # float
z = 2 + 3j    # complex

print(f"Types: {type(x)}, {type(y)}, {type(z)}")

In [None]:
# Strings
text = "Hello, Data Science!"
print(f"String operations:")
print(f"Upper: {text.upper()}")
print(f"Split: {text.split(',')}")
print(f"Length: {len(text)}")

In [None]:
# Boolean
is_true = True
is_false = False
print(f"\nBoolean operations:")
print(f"AND: {is_true and is_false}")
print(f"OR: {is_true or is_false}")
print(f"NOT: {not is_true}")

## 1.2 Data Structures

In [None]:
# Lists
numbers = [1, 2, 3, 4, 5]
mixed_list = [1, "two", 3.0, [4, 5]]

print(f"\nList operations:")
numbers.append(6)
print(f"After append: {numbers}")
print(f"Slicing: {numbers[2:4]}")
print(f"Last element: {numbers[-1]}")

In [None]:
# Tuples (immutable)
coordinates = (10, 20)
x, y = coordinates  # unpacking
print(f"\nTuple unpacking: x={x}, y={y}")

In [None]:
# Dictionaries
person = {
    "name": "John",
    "age": 30,
    "skills": ["Python", "Data Science"]
}

print(f"\nDictionary operations:")
print(f"Keys: {person.keys()}")
print(f"Values: {person.values()}")
print(f"Access value: {person['name']}")

In [None]:
# Sets
unique_numbers = {1, 2, 2, 3, 3, 4}
print(f"\nSet (unique values): {unique_numbers}")

## 1.3 Control Flow

In [None]:
# If-else
x = 10
if x > 5:
    print("\nx is greater than 5")
elif x == 5:
    print("x equals 5")
else:
    print("x is less than 5")

In [None]:
# Loops
print("\nFor loop:")
for i in range(3):
    print(f"Iteration {i}")

print("\nWhile loop:")
count = 0
while count < 3:
    print(f"Count: {count}")
    count += 1

## 1.4 Functions

In [None]:
def calculate_statistics(numbers):
    """
    Calculate basic statistics for a list of numbers.
    
    Args:
        numbers (list): List of numbers
    
    Returns:
        tuple: (mean, min, max)
    """
    mean = sum(numbers) / len(numbers)
    minimum = min(numbers)
    maximum = max(numbers)
    return mean, minimum, maximum

# Using the function
numbers = [1, 2, 3, 4, 5]
mean, min_val, max_val = calculate_statistics(numbers)
print(f"\nStatistics: Mean={mean}, Min={min_val}, Max={max_val}")

## 1.5 List Comprehension

In [None]:
# Traditional loop
squares = []
for i in range(5):
    squares.append(i**2)

In [None]:
# List comprehension
squares_lc = [i**2 for i in range(5)]

print(f"\nSquares using list comprehension: {squares_lc}")

In [None]:
# Conditional list comprehension
even_squares = [i**2 for i in range(5) if i % 2 == 0]
print(f"Even squares: {even_squares}")

# 2. NumPy Basics

In [None]:
import numpy as np

## 2.1 Creating Arrays

In [None]:
# From list
arr1 = np.array([1, 2, 3, 4, 5])
print(f"\nNumPy array: {arr1}")

In [None]:
# Special arrays
zeros = np.zeros(3)
ones = np.ones(3)
random_arr = np.random.rand(3)

print(f"Zeros: {zeros}")
print(f"Ones: {ones}")
print(f"Random: {random_arr}")

In [None]:
## 2.2 Array Operations

arr2 = np.array([6, 7, 8, 9, 10])

In [None]:
# Arithmetic
print(f"\nArray arithmetic:")
print(f"Add: {arr1 + arr2}")
print(f"Multiply: {arr1 * 2}")
print(f"Square: {arr1 ** 2}")

In [None]:
# Statistics
print(f"\nArray statistics:")
print(f"Mean: {arr1.mean()}")
print(f"Sum: {arr1.sum()}")
print(f"Standard deviation: {arr1.std()}")

# 3. Pandas Basics

In [None]:
import pandas as pd

## 3.1 Creating DataFrames

In [None]:
# From dictionary
data = {
    'name': ['John', 'Emma', 'Alex'],
    'age': [25, 30, 35],
    'city': ['New York', 'London', 'Paris']
}

df = pd.DataFrame(data)
print("\nPandas DataFrame:")
print(df)

## 3.2 Basic DataFrame Operations


In [None]:
# Accessing columns
print("\nAccessing age column:")
print(df['age'])

In [None]:
# Basic statistics
print("\nDataFrame statistics:")
print(df.describe())

# Filtering
print("\nFiltering ages > 30:")
print(df[df['age'] > 30])

# 4. Matplotlib Basics

In [None]:
import matplotlib.pyplot as plt

## 4.1 Basic Plotting

In [None]:
# Line plot
plt.figure(figsize=(10, 5))
plt.plot([1, 2, 3, 4], [1, 4, 2, 3])
plt.title('Simple Line Plot')
plt.xlabel('X axis')
plt.ylabel('Y axis')
plt.show()

In [None]:
# Scatter plot
plt.figure(figsize=(10, 5))
plt.scatter([1, 2, 3, 4], [1, 4, 2, 3])
plt.title('Simple Scatter Plot')
plt.xlabel('X axis')
plt.ylabel('Y axis')
plt.show()

# 5. Exercises

**Exercise 1: List Manipulation**
> Create a list of numbers from 1 to 10 and perform the following:
1. Calculate the sum
2. Find the average
3. Get all even numbers using list comprehension

In [None]:
# Your solution here
numbers = list(range(1, 11))
sum_numbers = sum(numbers)
avg_numbers = sum_numbers / len(numbers)
even_numbers = [x for x in numbers if x % 2 == 0]

print("\nExercise 1 Results:")
print(f"Sum: {sum_numbers}")
print(f"Average: {avg_numbers}")
print(f"Even numbers: {even_numbers}")

**Exercise 2: Dictionary Manipulation**
> Create a dictionary of students with their scores and perform the following:
1. Add a new student
2. Update an existing student's score
3. Calculate the average score

In [None]:
# Your solution here
students = {
    'John': 85,
    'Emma': 92,
    'Alex': 78
}

# Add new student
students['Sarah'] = 95

# Update score
students['John'] = 88

# Calculate average
avg_score = sum(students.values()) / len(students)

print("\nExercise 2 Results:")
print(f"Updated dictionary: {students}")
print(f"Average score: {avg_score}")

**Exercise 3: NumPy Array Operations**
> Create two NumPy arrays and perform the following:
1. Add them element-wise
2. Multiply them element-wise
3. Calculate the dot product

In [None]:
# Your solution here
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])

print("\nExercise 3 Results:")
print(f"Element-wise addition: {arr1 + arr2}")
print(f"Element-wise multiplication: {arr1 * arr2}")
print(f"Dot product: {np.dot(arr1, arr2)}")

**Exercise 4: Pandas DataFrame Analysis**
> Create a DataFrame with employee information and perform the following:
1. Filter employees above age 30
2. Calculate average salary by department
3. Sort by salary in descending order

In [None]:
# Your solution here
employee_data = {
    'name': ['John', 'Emma', 'Alex', 'Sarah'],
    'age': [25, 35, 45, 30],
    'salary': [50000, 60000, 75000, 65000],
    'department': ['IT', 'HR', 'IT', 'HR']
}

df = pd.DataFrame(employee_data)

print("\nExercise 4 Results:")
print("Employees above 30:")
print(df[df['age'] > 30])

print("\nAverage salary by department:")
print(df.groupby('department')['salary'].mean())

print("\nSorted by salary:")
print(df.sort_values('salary', ascending=False))

# 6. Additional Practice Problems

> 1. Create a function that takes a list of numbers and returns:
   - The list sorted in ascending order
   - The list sorted in descending order
   - Only the unique values

In [None]:
def process_numbers(numbers):
    asc = sorted(numbers)
    desc = sorted(numbers, reverse=True)
    unique = list(set(numbers))
    return asc, desc, unique

test_numbers = [4, 2, 7, 2, 9, 4, 1]
asc, desc, unique = process_numbers(test_numbers)

print("\nAdditional Practice - Problem 1:")
print(f"Ascending: {asc}")
print(f"Descending: {desc}")
print(f"Unique: {unique}")

> 2. Create a NumPy array of random numbers and:
   - Reshape it into a 3x3 matrix
   - Calculate the inverse (if possible)
   - Calculate the eigenvalues

In [None]:
random_array = np.random.rand(9)
matrix = random_array.reshape(3, 3)

print("\nAdditional Practice - Problem 2:")
print("Original matrix:")
print(matrix)

try:
    inverse = np.linalg.inv(matrix)
    print("\nInverse:")
    print(inverse)
    
    eigenvalues = np.linalg.eigvals(matrix)
    print("\nEigenvalues:")
    print(eigenvalues)
except np.linalg.LinAlgError:
    print("Matrix is not invertible")