In [1]:
import pandas as pd

df = pd.DataFrame({
    "price": [10,50,100],
    "quality": ["Low", "High", "High"]
})

df["is_good_deal"] = (df['price'] < 60) & (df['quality'] == 'High')

print(df)

   price quality  is_good_deal
0     10     Low         False
1     50    High          True
2    100    High         False


In [2]:
data = range(100)
train = data[:80]
test = data[80:]

In [3]:
words = ['apple', 'banana', 'apple', 'cherry', 'apple']

word_count = {}
for word in words:
    word_count[word] = word_count.get(word, 0) + 1

print(word_count)

{'apple': 3, 'banana': 1, 'cherry': 1}


In [4]:
prices = ['$100', '$200', '$300']

clean_prices = [int(price.replace('$','')) for price in prices]
print(clean_prices)

[100, 200, 300]


In [5]:
# from sklearn.linear_model import LinearRegression
# from sklearn.model_selection import train_test_split

# X = df[features]
# y = df[target]

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# model = LinearRegression()
# model.fit(X_train, y_train)


In [6]:
df.groupby('quality')['price'].mean()

quality
High    75.0
Low     10.0
Name: price, dtype: float64

In [7]:
y_true = [10, 20]
y_pred = [12, 18]

squared_errors = [(t - p)**2 for t,p in zip(y_true, y_pred)]

mse = sum(squared_errors) / len(squared_errors)

# The One-Liner
# mse = sum((t - p)**2 for t, p in zip(y_true, y_pred)) / len(y_true)

print(mse)

4.0


In [8]:
accuracy = 0.987654321

print(f"{accuracy * 100:.2f}%")

98.77%


In [9]:
def safe_divide(a, b):
    try:
        return a/b
    except ZeroDivisionError:
        return 0.0

print(safe_divide(10,0))

0.0


In [10]:
n = 1
while n <= 50:
    print(n)
    n *= 2

1
2
4
8
16
32


In [11]:
scores = [0.72, 0.8532, 0.91, 0.685, 0.88, 0.79, 0.945]

scores_as_percent = [f"{score * 100:.1f}%" for score in scores if score > 0.80]
print(scores_as_percent)

['85.3%', '91.0%', '88.0%', '94.5%']


In [12]:
cities = ["Austin", "Denver", "Tampa", "Boise"]
prices = [425000, 510000, 340000, 395000]
sqft = [1800, 2100, 1650, 1900]

price_per_sqft = [round(p / s, 2) for p, s in zip(prices, sqft)]

market_data = dict(zip(cities, price_per_sqft))

print(market_data)

{'Austin': 236.11, 'Denver': 242.86, 'Tampa': 206.06, 'Boise': 207.89}


In [13]:
def safe_divide(a, b):
    try:
        return f"{a/b:.2f}"
    except ZeroDivisionError:
        return "Cannot divide by zero"
    except TypeError:
        return "Invalid input type"

print(safe_divide(10, 3))
print(safe_divide(5, 0))
print(safe_divide("a", 2))

3.33
Cannot divide by zero
Invalid input type


In [14]:
neighborhoods = pd.DataFrame({
    "neighborhood": ["Downtown", "Eastside", "Westlake", "Southpark", "Northgate", "Midtown"],
    "median_rent": [2100, 1450, 1875, 1200, 1650, 1950],
    "vacancy_rate": [5.2, 9.1, 6.8, 11.3, 7.5, 4.9],
    "units_available": [45, 78, 32, 95, 58, 28]
})

neighborhoods[neighborhoods['vacancy_rate'] < 8].sort_values(
    'median_rent', ascending=False).head(3)[['neighborhood','median_rent']]

Unnamed: 0,neighborhood,median_rent
0,Downtown,2100
5,Midtown,1950
2,Westlake,1875


In [15]:
student_scores = {
    "Alice": 0.945,
    "Bob": 0.88,
    "Carol": 0.912,
    "Dan": 0.867
}

ranked = sorted(student_scores.items(), key=lambda x: x[1], reverse=True)


for i, (student,score) in enumerate(ranked, start=1):
    suffix = "th"
    if i == 1:
        suffix = "st"
    elif i == 2:
        suffix = "nd"
    elif i == 3:
        suffix = "rd"
    print(f"{i}{suffix}: {student} - {score:.1%}")

1st: Alice - 94.5%
2nd: Carol - 91.2%
3rd: Bob - 88.0%
4th: Dan - 86.7%


In [16]:
deals = pd.DataFrame({
    "region": ["North", "South", "North", "South", "North", "South", "North"],
    "deal_size": [15000, 22000, 18000, 9500, 31000, 17500, 12000],
    "stage": ["Won", "Won", "Lost", "Won", "Won", "Lost", "Won"]
})

result = deals.groupby('region').agg({"deal_size": ["mean", "sum"]})
print(result)

           deal_size       
                mean    sum
region                     
North   19000.000000  76000
South   16333.333333  49000


In [17]:
import numpy as np

clients = pd.DataFrame({
    "company": ["Acme", "Bolt", "Core", "Dash", "Edge", "Flux"],
    "revenue": [75000, 18000, 42000, 55000, 12000, 31000]
})

conditions = [
    clients['revenue'] >= 50000,
    clients['revenue'] >= 20000
]

choices = ["Enterprise", "Mid-Market"]

clients['tier'] = np.select(conditions, choices, default="SMB")

print(clients['tier'].value_counts())



tier
Enterprise    2
SMB           2
Mid-Market    2
Name: count, dtype: int64


In [18]:
paired = [("Llama", 8), ("GPT-4", 17000), ("Claude", 175), ("Gemini", 400)]

names,vals = zip(*paired)

print(*names, sep=' | ')

Llama | GPT-4 | Claude | Gemini


In [19]:
config = {
    "model": {
        "architecture": "transformer",
        "hyperparams": {
            "learning_rate": 0.001,
            "batch_size": 32
        }
    }
}

print(config.get('model', {}).get('hyperparams', {}).get('learning_rate', "not set"))

config_broken = {
    "model": {
        "architecture": "transformer"
    }
}

print(config_broken.get('model', {}).get('hyperparams', {}).get('learning_rate', "not set"))

0.001
not set
