In [7]:
# Question 1
# Step 1: Load the CSV file
with open("diamonds.csv", "r") as file:
    lines = file.readlines()

# Step 2: Extract the header and diamonds with 'Very Good' cut
header = lines[0].strip().split(",")  # Get the header line
data = [line.strip().split(",") for line in lines[1:]]  # Process all lines except the header

# Indexes for relevant columns
cut_index = header.index("cut")
carat_index = header.index("carat")
price_index = header.index("price")

# Step 3: Filter diamonds with 'Very Good' cut
very_good_diamonds = [row for row in data if row[cut_index] == "Very Good"]

# Step 4: Count entries
total_entries = len(very_good_diamonds)

# Step 5: Compute average carat weight
total_carat = sum(float(row[carat_index]) for row in very_good_diamonds)
average_carat = total_carat / total_entries

# Step 6: Compute average price
total_price = sum(float(row[price_index]) for row in very_good_diamonds)
average_price = total_price / total_entries

# Output the results
print(f"Total number of entries: {total_entries}")
print(f"Average weight (in carats): {average_carat:.4f}")
print(f"Average price (in US dollars): {average_price:.4f}")


Total number of entries: 12082
Average weight (in carats): 0.8064
Average price (in US dollars): 3981.7599


In [6]:
# Question 2
# Convert relevant columns to floats
carat_weights = [float(row[carat_index]) for row in very_good_diamonds]
prices = [float(row[price_index]) for row in very_good_diamonds]

# Step 1: Compute average prices per carat using method 'a'
N = len(very_good_diamonds)

# Method "a"
sum_price_per_carat = sum(price / weight for price, weight in zip(prices, carat_weights))
average_a = sum_price_per_carat / N

# Method "b"
sum_prices = sum(prices)
sum_weights = sum(carat_weights)
average_b = sum_prices / sum_weights

# Step 2: Find the max, min, and median price per carat
price_per_carat = [price / weight for price, weight in zip(prices, carat_weights)]

# Maximum price per carat
max_price_per_carat = max(price_per_carat)

# Minimum price per carat
min_price_per_carat = min(price_per_carat)

# Median price per carat
sorted_price_per_carat = sorted(price_per_carat)
middle = N // 2
if N % 2 == 0:
    median_price_per_carat = (sorted_price_per_carat[middle - 1] + sorted_price_per_carat[middle]) / 2
else:
    median_price_per_carat = sorted_price_per_carat[middle]

# Output the results
print(f"Average price per carat using method 'a': {average_a:.4f}")
print(f"Average price per carat using method 'b': {average_b:.4f}")
print(f"Maximum price per carat: {max_price_per_carat:.4f}")
print(f"Minimum price per carat: {min_price_per_carat:.4f}")
print(f"Median price per carat: {median_price_per_carat:.4f}")

# Step 3: Compare the two averages
if average_a < average_b:
    print("Method 'a' gives a lower average price per carat.")
else:
    print("Method 'b' gives a lower average price per carat.")


Average price per carat using method 'a': 4014.1284
Average price per carat using method 'b': 4937.8122
Maximum price per carat: 17828.8462
Minimum price per carat: 1138.7097
Median price per carat: 3605.8258
Method 'a' gives a lower average price per carat.


In [14]:
# Question 3
color_index = header.index("color")
clarity_index = header.index("clarity")
depth_index = header.index("depth")
table_index = header.index("table")
price_per_carat = [(float(row[price_index]) / float(row[carat_index]), row) for row in very_good_diamonds]

# Step 1: Find the highest and lowest price per carat
max_price_per_carat, max_diamond = max(price_per_carat, key=lambda x: x[0])
min_price_per_carat, min_diamond = min(price_per_carat, key=lambda x: x[0])

# Step 2: Output the details of the diamond with highest and lowest price per carat
max_combination = {
    "price_per_carat": max_price_per_carat,
    "color": max_diamond[color_index],
    "clarity": max_diamond[clarity_index],
    "depth": max_diamond[depth_index],
    "table": max_diamond[table_index],
}

min_combination = {
    "price_per_carat": min_price_per_carat,
    "color": min_diamond[color_index],
    "clarity": min_diamond[clarity_index],
    "depth": min_diamond[depth_index],
    "table": min_diamond[table_index],
}

# Print the results
print("Highest price per carat:")
for key, value in max_combination.items():
    print(f"{key}: {value}")

print("\nLowest price per carat:")
for key, value in min_combination.items():
    print(f"{key}: {value}")

Highest price per carat:
price_per_carat: 17828.846153846152
color: D
clarity: IF
depth: 61.3
table: 56

Lowest price per carat:
price_per_carat: 1138.7096774193549
color: J
clarity: SI1
depth: 59.4
table: 62


In [15]:
# Question 4
# Given values
average_price_per_carat = 6642  # This is the average price per carat from method 'a'
diamond_weight = 102  # Weight of the Sotheby's diamond in carats
actual_price = 15_700_000  # Actual price from the Sotheby's auction in USD

# Step 1: Estimate the price using the average price per carat
estimated_price = average_price_per_carat * diamond_weight

# Step 2: Calculate the difference between the estimated price and the actual price
price_difference = actual_price - estimated_price

# Step 3: Calculate the percentage difference
percentage_difference = (price_difference / actual_price) * 100

# Output the results
print(f"Estimated price for the 102-carat diamond: ${estimated_price:,.2f}")
print(f"Actual price from Sotheby's auction: ${actual_price:,.2f}")
print(f"Difference between actual and estimated price: ${price_difference:,.2f}")
print(f"Percentage difference: {percentage_difference:.2f}%")

# Conclusion on why there is a difference
if estimated_price < actual_price:
    print("The actual price is significantly higher due to the exceptional quality of the diamond (D-color, flawless).")
else:
    print("The estimated price is higher than the actual price, but this is unlikely due to the extraordinary nature of the Sotheby's diamond.")


Estimated price for the 102-carat diamond: $677,484.00
Actual price from Sotheby's auction: $15,700,000.00
Difference between actual and estimated price: $15,022,516.00
Percentage difference: 95.68%
The actual price is significantly higher due to the exceptional quality of the diamond (D-color, flawless).
