In [2]:
%load_ext sql
%sql sqlite:///final_project_database.db
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

In [1]:
import pandas as pd
import sqlite3

# 7 CSV dosyanızı ve hedef tablo adlarınızı tanımlayın
csv_files = {
    'cheese_production': 'final_project_data/cheese_production.csv',
    'coffee_production': 'final_project_data/coffee_production.csv',
    'egg_production': 'final_project_data/egg_production.csv',
    'honey_production': 'final_project_data/honey_production.csv',
    'milk_production': 'final_project_data/milk_production.csv',
    'yogurt_production': 'final_project_data/yogurt_production.csv',
    'state_lookup': 'final_project_data/state_lookup.csv'
}

# Veritabanı bağlantısını kurun
conn = sqlite3.connect('final_project_database.db')
print("Veritabanı bağlantısı kuruldu.")

print("Veri yükleme işlemi başlıyor...")
for table_name, file_path in csv_files.items():
    try:
        # CSV dosyasını okuyun
        df = pd.read_csv(file_path)

        # Veriyi veritabanına yazın
        df.to_sql(table_name, conn, if_exists='replace', index=False)
        
        print(f"'{file_path}' dosyası başarıyla '{table_name}' tablosuna yüklendi.")
    
    except FileNotFoundError:
        print(f"Hata: '{file_path}' dosyası bulunamadı. Bu dosya atlanıyor.")
    except Exception as e:
        print(f"'{file_path}' yüklenirken bir hata oluştu: {e}")

# Veritabanı bağlantısını kapatın
conn.close()
print("Veritabanı bağlantısı kapatıldı.")

Veritabanı bağlantısı kuruldu.
Veri yükleme işlemi başlıyor...
'final_project_data/cheese_production.csv' dosyası başarıyla 'cheese_production' tablosuna yüklendi.
'final_project_data/coffee_production.csv' dosyası başarıyla 'coffee_production' tablosuna yüklendi.
'final_project_data/egg_production.csv' dosyası başarıyla 'egg_production' tablosuna yüklendi.
'final_project_data/honey_production.csv' dosyası başarıyla 'honey_production' tablosuna yüklendi.
'final_project_data/milk_production.csv' dosyası başarıyla 'milk_production' tablosuna yüklendi.
'final_project_data/yogurt_production.csv' dosyası başarıyla 'yogurt_production' tablosuna yüklendi.
'final_project_data/state_lookup.csv' dosyası başarıyla 'state_lookup' tablosuna yüklendi.
Veritabanı bağlantısı kapatıldı.


In [10]:
%%sql
UPDATE cheese_production SET value = REPLACE(value, ',', '');
UPDATE honey_production SET value = REPLACE(value, ',', '');
UPDATE milk_production SET value = REPLACE(value, ',', '');
UPDATE coffee_production SET value = REPLACE(value, ',', '');
UPDATE egg_production SET value = REPLACE(value, ',', '');
UPDATE yogurt_production SET value = REPLACE(value, ',', '');

 * sqlite:///final_project_database.db
7488 rows affected.
1559 rows affected.
37638 rows affected.
71 rows affected.
6327 rows affected.
149 rows affected.


[]

# Practise Questions

Question 1

Find the total milk production for the year 2023.

In [6]:
%%sql

SELECT SUM(Value) FROM milk_production WHERE Year = 2023;

 * sqlite:///final_project_database.db
Done.


SUM(Value)
35947.0


Question 2

Show coffee production data for the year 2015.

What is the total value?

In [12]:
%%sql

SELECT *
FROM coffee_production
WHERE Year=2015

 * sqlite:///final_project_database.db
Done.


Year,Period,Geo_Level,State_ANSI,Commodity_ID,Value
2015,YEAR,STATE,15,1,6600000


Question 3

Find the average honey production for the year 2022.

In [7]:
%%sql

SELECT AVG(Value) FROM honey_production WHERE Year = 2022;

 * sqlite:///final_project_database.db
Done.


AVG(Value)
140.25


Question 4

Get the state names with their corresponding ANSI codes from the state_lookup table.

What number is Iowa?

In [16]:
%%sql

SELECT * FROM state_lookup
WHERE State = "IOWA"

 * sqlite:///final_project_database.db
Done.


State,State_ANSI
IOWA,19


Question 5

Find the highest yogurt production value for the year 2022.

In [18]:
%%sql

SELECT MAX(Value) 
FROM yogurt_production
WHERE Year=2022

 * sqlite:///final_project_database.db
Done.


MAX(Value)
793256000


Question 6

Find states where both honey and milk were produced in 2022.

In [21]:
%%sql

SELECT DISTINCT T1.State_ANSI
FROM honey_production AS T1
INNER JOIN milk_production AS T2 ON T1.State_ANSI = T2.State_ANSI
WHERE T1.Year = 2022 AND T2.Year = 2022;

 * sqlite:///final_project_database.db
Done.


State_ANSI
1.0
4.0
5.0
6.0
8.0
12.0
13.0
16.0
17.0
18.0


Question 7

Find the total yogurt production for states that also produced cheese in 2022.

In [5]:
%%sql

SELECT SUM(t1.Value)
FROM yogurt_production t1
INNER JOIN cheese_production t2 ON t2.State_ANSI = t1.State_ANSI
WHERE t1.Year = 2022 AND t2.Year = 2022

 * sqlite:///final_project_database.db
Done.


SUM(t1.Value)
15210.0


# Final Project

Question 1

Can you find out the total milk production for 2023? Your manager wants this information for the yearly report.

What is the total milk production for 2023?

In [11]:
%%sql

SELECT SUM(Value) FROM milk_production WHERE Year = 2023

 * sqlite:///final_project_database.db
Done.


SUM(Value)
91812000000


Question 2

Which states had cheese production greater than 100 million in April 2023? The Cheese Department wants to focus their marketing efforts there. 

How many states are there?

In [18]:
%%sql

SELECT * FROM cheese_production
WHERE Year=2023 AND Period="APR"
GROUP BY State_ANSI
HAVING SUM(Value) > 100000000

 * sqlite:///final_project_database.db
Done.


Year,Period,Geo_Level,State_ANSI,Commodity_ID,Domain,Value
2023,APR,STATE,,6,TOTAL,215206000
2023,APR,STATE,6.0,6,TOTAL,208807000
2023,APR,STATE,55.0,6,TOTAL,289699000


Question 3

Your manager wants to know how coffee production has changed over the years. 

What is the total value of coffee production for 2011?

In [20]:
%%sql

SELECT *
FROM coffee_production
WHERE Year=2011

 * sqlite:///final_project_database.db
Done.


Year,Period,Geo_Level,State_ANSI,Commodity_ID,Value
2011,YEAR,STATE,15,1,7600000


Question 4

There's a meeting with the Honey Council next week. Find the average honey production for 2022 so you're prepared.

In [21]:
%%sql

SELECT AVG(Value)
FROM honey_production
WHERE Year=2022

 * sqlite:///final_project_database.db
Done.


AVG(Value)
3133275.0


Question 5

The State Relations team wants a list of all states names with their corresponding ANSI codes. Can you generate that list?

What is the State_ANSI code for Florida?

In [22]:
%%sql

SELECT * FROM state_lookup

 * sqlite:///final_project_database.db
Done.


State,State_ANSI
ALABAMA,1
ALASKA,2
ARIZONA,4
ARKANSAS,5
CALIFORNIA,6
COLORADO,8
CONNECTICUT,9
DELAWARE,10
FLORIDA,12
GEORGIA,13


Question 6

For a cross-commodity report, can you list all states with their cheese production values, even if they didn't produce any cheese in April of 2023?

What is the total for NEW JERSEY?

In [42]:
%%sql

SELECT
    sl.State,
    SUM(cp.Value)
FROM state_lookup AS sl
LEFT JOIN cheese_production AS cp ON sl.State_ANSI = cp.State_ANSI
WHERE cp.Year = 2023 AND cp.Period = "APR"
GROUP BY
    sl.State;

 * sqlite:///final_project_database.db
Done.


State,SUM(cp.Value)
CALIFORNIA,208807000
IDAHO,86452000
ILLINOIS,5068000
IOWA,31512000
MINNESOTA,69728000
NEW JERSEY,4889000
NEW MEXICO,79038000
NEW YORK,66256000
OHIO,20510000
PENNSYLVANIA,39420000


Question 7

Can you find the total yogurt production for states in the year 2022 which also have cheese production data from 2023? This will help the Dairy Division in their planning.

In [29]:
%%sql

SELECT SUM(T1.Value)
FROM yogurt_production AS T1
JOIN cheese_production AS T2
ON T1.State_ANSI = T2.State_ANSI
WHERE T1.Year = 2022 AND T2.Year = 2023;

 * sqlite:///final_project_database.db
Done.


SUM(T1.Value)
4684380000


List all states from state_lookup that are missing from milk_production in 2023.

How many states are there?

In [33]:
%%sql

SELECT COUNT(s.State_ANSI)
FROM state_lookup s
WHERE s.State_ANSI NOT IN (SELECT m.State_ANSI FROM milk_production m WHERE Year=2023)

 * sqlite:///final_project_database.db
Done.


COUNT(s.State_ANSI)
26


List all states with their cheese production values, including states that didn't produce any cheese in April 2023.

Did Delaware produce any cheese in April 2023?

In [38]:
%%sql

SELECT *
FROM cheese_production
LEFT JOIN state_lookup ON state_lookup.state_ANSI = cheese_production.state_ANSI
WHERE Year = 2023 AND Period="APR"

 * sqlite:///final_project_database.db
Done.


Year,Period,Geo_Level,State_ANSI,Commodity_ID,Domain,Value,State,State_ANSI_1
2023,APR,STATE,6.0,6,TOTAL,208807000,CALIFORNIA,6.0
2023,APR,STATE,16.0,6,TOTAL,86452000,IDAHO,16.0
2023,APR,STATE,17.0,6,TOTAL,5068000,ILLINOIS,17.0
2023,APR,STATE,19.0,6,TOTAL,31512000,IOWA,19.0
2023,APR,STATE,27.0,6,TOTAL,69728000,MINNESOTA,27.0
2023,APR,STATE,34.0,6,TOTAL,4889000,NEW JERSEY,34.0
2023,APR,STATE,35.0,6,TOTAL,79038000,NEW MEXICO,35.0
2023,APR,STATE,36.0,6,TOTAL,66256000,NEW YORK,36.0
2023,APR,STATE,39.0,6,TOTAL,20510000,OHIO,39.0
2023,APR,STATE,,6,TOTAL,215206000,,


Find the average coffee production for all years where the honey production exceeded 1 million.

In [41]:
%%sql

SELECT AVG(Yearly_AVG) FROM
(SELECT Year,AVG(c.Value) AS Yearly_AVG
FROM coffee_production c
GROUP BY c.Year
HAVING c.Year IN (SELECT h.Year FROM honey_production h GROUP BY h.Year HAVING AVG(h.Value) > 1000000)) AS total_avg

 * sqlite:///final_project_database.db
Done.


AVG(Yearly_AVG)
6426666.666666667
