# Implementation
Euisuh Jeong

In [1]:
import sqlite3
import pandas as pd
import time

db_name = 'esj2.db'

## Question

### Question 1: Total Sales by Years and Transaction Types
Produce a pivot table for total sales, with two dimensions, namely, (i) the years of the transactions, and (ii) the types of transactions.

In [2]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    SELECT 
        strftime('%Y', dateTime) AS transaction_year,
        payMethod AS transaction_type,
        SUM(totalPrice) AS total_sales
    FROM TICKET_TRANSACTION
    GROUP BY transaction_year, transaction_type
'''

df1 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 5.43 seconds


In [3]:
df1.head()

Unnamed: 0,transaction_year,transaction_type,total_sales
0,1994,Cash,505335
1,1994,Credit Card,1034460
2,1994,Wire Transfer,171540
3,1995,Cash,822115
4,1995,Credit Card,1681885


### Question 2: Total Sales by Months with ROLLUP to Years and Cinema Locations
Produce a pivot table for total sales, with two dimensions, namely, (i) the months of the transactions with ROLLUP to years, and (ii) Cinema locations.

In [4]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    SELECT 
        strftime('%Y', tt.dateTime) AS year,
        strftime('%m', tt.dateTime) AS month,
        s.address AS cinema_location,
        SUM(tt.totalPrice) AS total_sales
    FROM TICKET_TRANSACTION tt
    JOIN SESSION s ON tt.sessionId = s.id
    GROUP BY year, month, cinema_location
    ORDER BY year, month, cinema_location;
'''

df2 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 6.88 seconds


In [5]:
df2.head()

Unnamed: 0,year,month,cinema_location,total_sales
0,1994,2,122 Mckee Flat Suite 088,7120
1,1994,2,22522 Savage Rest Suite 648,7745
2,1994,2,334 Simmons Branch Suite 380,7935
3,1994,2,3419 Griffin Keys,6875
4,1994,2,360 Jessica Square,7850


### Question 3: Total Sales by Tickets and Promotion Usage
Produce a pivot table for total sales, with two dimensions, namely, (i) the tickets, and (ii) whether the tickets have promotion or not.


In [6]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    SELECT 
        CASE 
            WHEN promotionId IS NOT NULL THEN 'With Promotion' 
            ELSE 'Without Promotion' 
        END AS promotion_status,
        COUNT(*) AS ticket_count,
        SUM(totalPrice) AS total_sales
    FROM TICKET_TRANSACTION
    GROUP BY promotion_status;
'''

df3 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 3.90 seconds


In [7]:
df3.head()

Unnamed: 0,promotion_status,ticket_count,total_sales
0,With Promotion,18696,1055445
1,Without Promotion,981304,82335820


### Question 4: Total Sales in 2018 by Customer Genders and Weekday/Weekend
Produce a pivot table on total sales in 2018, with two dimensions, namely, (i) the genders of the customers, and (ii) whether the movies are shown on weekdays or weekends.

In [8]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    SELECT 
        CUSTOMER.gender,
        CASE 
            WHEN strftime('%w', TICKET_TRANSACTION.dateTime) IN ('0', '6') THEN 'Weekend' 
            ELSE 'Weekday' 
        END AS day_type,
        SUM(TICKET_TRANSACTION.totalPrice) AS total_sales
    FROM TICKET_TRANSACTION
    JOIN CUSTOMER ON TICKET_TRANSACTION.customerId = CUSTOMER.id
    WHERE strftime('%Y', TICKET_TRANSACTION.dateTime) = '2018'
    GROUP BY CUSTOMER.gender, day_type;
'''

df4 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 3.82 seconds


In [9]:
df4.head()

Unnamed: 0,gender,day_type,total_sales
0,F,Weekday,100743
1,F,Weekend,38914
2,M,Weekday,98988
3,M,Weekend,39582
4,U,Weekday,94801


### Question 5: Total Sales in 2018 by Customer Genders and Part of the Day
Produce a pivot table on total sales in 2018, with two dimensions, namely, (i) the genders of the customers, and (ii) whether the movie is shown in the morning, in the afternoon, or at evening

In [10]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    SELECT 
        CUSTOMER.gender,
        CASE 
            WHEN strftime('%H', SESSION.dateTime) BETWEEN '00' AND '11' THEN 'Morning'
            WHEN strftime('%H', SESSION.dateTime) BETWEEN '12' AND '17' THEN 'Afternoon'
            ELSE 'Evening'
        END AS part_of_day,
        SUM(TICKET_TRANSACTION.totalPrice) AS total_sales
    FROM TICKET_TRANSACTION
    JOIN CUSTOMER ON TICKET_TRANSACTION.customerId = CUSTOMER.id
    JOIN SESSION ON TICKET_TRANSACTION.sessionId = SESSION.id
    WHERE strftime('%Y', TICKET_TRANSACTION.dateTime) = '2018'
    GROUP BY CUSTOMER.gender, part_of_day;
'''

df5 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 4.11 seconds


In [11]:
df5.head()

Unnamed: 0,gender,part_of_day,total_sales
0,F,Afternoon,31311
1,F,Evening,32231
2,F,Morning,76115
3,M,Afternoon,30431
4,M,Evening,34608


### Question 6: Total Number of Tickets Sold in 2018 by Customer Genders and Number of Tickets per Transaction
Produce a pivot table on total number of tickets sold in 2018, with two dimensions, namely, (i) the genders of the customers, and (ii) the numbers of tickets bought in each transaction.

In [12]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    SELECT
        C.gender AS CustomerGender,
        TT.numTickets AS NumberOfTickets,
        COUNT(*) AS TotalTicketsSold
    FROM TICKET_TRANSACTION TT
    JOIN CUSTOMER C ON TT.customerId = C.id
    WHERE strftime('%Y', TT.dateTime) = '2018'
    GROUP BY C.gender, TT.numTickets
    ORDER BY C.gender, TT.numTickets;
'''

df6 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 3.80 seconds


In [13]:
df6.head()

Unnamed: 0,CustomerGender,NumberOfTickets,TotalTicketsSold
0,F,1,350
1,F,2,306
2,F,3,362
3,F,4,322
4,F,5,348


### Question 7: Total Sales from 2012 to 2018 for Movies Directed by Mohamed Khan
Produce a pivot table on total sales from 2012 to 2018 for movies directed by Mohamed Khan, with two dimensions, namely, (i) the years of transactions, and (ii) the time of showing the movie.

In [14]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    SELECT 
        strftime('%Y', tt.dateTime) AS transaction_year,
        strftime('%H:%M', s.dateTime) AS showing_time,
        SUM(tt.totalPrice) AS total_sales
    FROM TICKET_TRANSACTION tt
    JOIN SESSION s ON tt.sessionId = s.id
    JOIN MOVIE m ON s.movieId = m.id
    JOIN CAST c ON m.id = c.movieId
    JOIN ARTIST a ON c.artistId = a.id
    WHERE 
        a.name = 'Mohamed Khan' AND 
        c.isDirector = 1 AND 
        strftime('%Y', tt.dateTime) BETWEEN '2012' AND '2018'
    GROUP BY 
        transaction_year, 
        showing_time
    ORDER BY 
        transaction_year, 
        showing_time;
'''

df7 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 6.22 seconds


In [15]:
df7.head()

Unnamed: 0,transaction_year,showing_time,total_sales
0,2017,00:45,8575
1,2017,02:45,9500
2,2017,04:15,9735
3,2017,05:30,6770
4,2017,08:15,15875


### Question 8: Total Sales for Movies Where Omar Sharif Was Cast, by Movie Genres and Cinema States
Produce a pivot table on total sales for movies where Omar Sharif was casted in, with two dimensions, namely, (i) genres of the movies, and (ii) states in which the cinemas are located.

In [16]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    SELECT 
        m.genre AS movie_genre,
        s.state AS cinema_state,
        SUM(tt.totalPrice) AS total_sales
    FROM TICKET_TRANSACTION tt
    JOIN SESSION s ON tt.sessionId = s.id
    JOIN MOVIE m ON s.movieId = m.id
    JOIN CAST c ON m.id = c.movieId
    JOIN ARTIST a ON c.artistId = a.id
    WHERE 
        a.name = 'Omar Sharif'
    GROUP BY 
        movie_genre, 
        cinema_state
    ORDER BY 
        movie_genre, 
        cinema_state;
'''

df8 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 12.26 seconds


In [17]:
df8.head()

Unnamed: 0,movie_genre,cinema_state,total_sales
0,Mystery,California,25245
1,Mystery,Florida,32315
2,Mystery,Georgia,78205
3,Mystery,Illinois,15385
4,Mystery,Pennsylvania,23835


### Question 9: Total Sales for Offline Transactions in 2015 by Cinema States and Hall Size
Produce a pivot table on total sales for offline transactions in 2015, with two dimensions, namely, (i) the states in which the cinemas are located, and (ii) whether the movie is shown in a small-size, mid-size, or large-size hall. (You can define your own categorization of small-size, mid-size, and large-size halls.)

In [18]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    SELECT 
        s.state AS cinema_state,
        CASE 
            WHEN s.size <= 100 THEN 'Small-size'
            WHEN s.size > 100 AND s.size <= 200 THEN 'Mid-size'
            ELSE 'Large-size'
        END AS hall_size_category,
        SUM(tt.totalPrice) AS total_sales
    FROM TICKET_TRANSACTION tt
    JOIN SESSION s ON tt.sessionId = s.id
    WHERE 
        tt.payMethod != 'Online' AND 
        strftime('%Y', tt.dateTime) = '2015'
    GROUP BY 
        cinema_state, 
        hall_size_category
    ORDER BY 
        cinema_state, 
        hall_size_category;
'''

df9 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 3.96 seconds


In [19]:
df9.head()

Unnamed: 0,cinema_state,hall_size_category,total_sales
0,California,Large-size,62685
1,California,Mid-size,128200
2,California,Small-size,123656
3,Florida,Large-size,120344
4,Florida,Mid-size,97393


### Question 10: Total Sales from 2012 to 2015 by Customer Genders and Age Groups
Produce a pivot table on the total sales from 2012 to 2015, with two dimensions, namely, (i) the genders of the customers, and (ii) the ages of the customers at the time of ticket purchase, with ROLLUP to age groups. (You can define your own categorization of age groups, e.g., [1, 10], [11, 20], [21, 30], etc.

In [20]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    SELECT
        C.gender AS Gender,
        CASE
            WHEN CAST((strftime('%Y', TT.dateTime) - strftime('%Y', C.dob)) / 10 AS INTEGER) * 10 < 20 THEN '1-20'
            WHEN CAST((strftime('%Y', TT.dateTime) - strftime('%Y', C.dob)) / 10 AS INTEGER) * 10 BETWEEN 20 AND 30 THEN '21-30'
            WHEN CAST((strftime('%Y', TT.dateTime) - strftime('%Y', C.dob)) / 10 AS INTEGER) * 10 BETWEEN 30 AND 40 THEN '31-40'
            ELSE '41+' END AS Age_Group,
        SUM(TT.totalPrice) AS Total_Sales
    FROM TICKET_TRANSACTION TT
    JOIN CUSTOMER C ON TT.customerId = C.id
    WHERE strftime('%Y', TT.dateTime) BETWEEN '2012' AND '2015'
    GROUP BY Gender, Age_Group
'''

df10 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 4.16 seconds


In [21]:
df10.head()

Unnamed: 0,Gender,Age_Group,Total_Sales
0,F,1-20,54995
1,F,21-30,97896
2,F,31-40,49933
3,F,41+,165313
4,M,1-20,56390


### Question 11: Ranking Cinemas by Total Sales in 2018 for Each City
For each city, rank the cinemas in the city in descending order of total sales in 2018.


Need to change the address of the cinema

In [22]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    SELECT
        City,
        Cinema,
        SUM(Total_Sales) AS Total_Sales,
        RANK() OVER(PARTITION BY City ORDER BY SUM(Total_Sales) DESC) AS Rank
    FROM (
        SELECT
            S.city AS City,
            S.address AS Cinema,
            TT.totalPrice AS Total_Sales
        FROM TICKET_TRANSACTION TT
        JOIN SESSION S ON TT.sessionId = S.id
        WHERE strftime('%Y', TT.dateTime) = '2018'
    ) AS Subquery
    GROUP BY City, Cinema
    ORDER BY City, Rank;
'''

df11 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 4.17 seconds


In [23]:
df11.head()

Unnamed: 0,City,Cinema,Total_Sales,Rank
0,Allentown,88034 Baker Trafficway,99205,1
1,Allentown,0794 Miller Place,63935,2
2,Allentown,3419 Griffin Keys,37965,3
3,Atlanta,049 William Parkways,107937,1
4,Atlanta,38418 Vincent Passage Suite 503,101992,2


### Question 12: Ranking Movies by Total Sales for Directors, Under-50 Audience
For each director, rank his/her movies in descending orders of total sales for customers with ages under 50 (at the time of ticket purchases).

In [24]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    SELECT
        A.name AS Director,
        M.title AS Movie,
        SUM(TT.totalPrice) AS Total_Sales,
        RANK() OVER(PARTITION BY A.name ORDER BY SUM(TT.totalPrice) DESC) AS Rank
    FROM TICKET_TRANSACTION TT
    JOIN CUSTOMER C ON TT.customerId = C.id
    JOIN SESSION S ON TT.sessionId = S.id
    JOIN MOVIE M ON S.movieId = M.id
    JOIN CAST CA ON M.id = CA.movieId
    JOIN ARTIST A ON CA.artistId = A.id
    WHERE CA.isDirector = 1
    AND (strftime('%Y', 'now') - strftime('%Y', C.dob)) < 50
    GROUP BY Director, Movie
'''

df12 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 3.79 seconds


In [25]:
df12.head()

Unnamed: 0,Director,Movie,Total_Sales,Rank
0,Aaron Ruiz,Implemented encompassing leverage,8775,1
1,Aaron Ruiz,Object-based context-sensitive standardization,7745,2
2,Aaron Ruiz,Phased content-based benchmark,7705,3
3,Aaron Ruiz,Upgradable attitude-oriented archive,7485,4
4,Aaron Ruiz,Front-line fresh-thinking time-frame,6996,5


### Question 13: Ranking Browsers by Transaction Numbers for Cinemas in Different States
Consider the online transactions made with various browsers, for cinemas in different states. For each city, rank the browsers in descending order of the total numbers of transactions made.

In [26]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    SELECT
        City,
        Browser,
        COUNT(*) AS Total_Transactions,
        RANK() OVER(PARTITION BY City ORDER BY COUNT(*) DESC) AS Rank
    FROM (
        SELECT
            S.city AS City,
            TT.browser AS Browser
        FROM TICKET_TRANSACTION TT
        JOIN SESSION s ON TT.sessionId = s.id
        WHERE TT.browser IS NOT NULL
    ) GROUP BY City, Browser
'''

df13 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 4.47 seconds


In [27]:
df13.head()

Unnamed: 0,City,Browser,Total_Transactions,Rank
0,Allentown,Chrome,31062,1
1,Allentown,Web Browser,8900,2
2,Allentown,Safari,4474,3
3,Atlanta,Chrome,39744,1
4,Atlanta,Web Browser,11520,2


### Question 14: Top 13 Movies in 2013 by Gender
Find the top 13 movies in 2013 (in terms of the total number of tickets sold) for male and female customers, respectively.

In [28]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    WITH GenderSales AS (
        SELECT
            M.title AS Movie,
            C.gender,
            COUNT(TT.id) AS Tickets_Sold
        FROM TICKET_TRANSACTION TT
        JOIN SESSION S ON TT.sessionId = S.id
        JOIN MOVIE M ON S.movieId = M.id
        JOIN CUSTOMER C ON TT.customerId = C.id
        WHERE strftime('%Y', S.dateTime) = '2013'
        GROUP BY M.title, C.gender
    ),
    RankedMovies AS (
        SELECT
            Movie,
            gender,
            Tickets_Sold,
            RANK() OVER(PARTITION BY gender ORDER BY Tickets_Sold DESC) AS Rank
        FROM GenderSales
    )
    SELECT *
    FROM RankedMovies
    WHERE Rank <= 13
    ORDER BY gender, Rank;
'''

df14 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 3.71 seconds


In [29]:
df14.head()

Unnamed: 0,Movie,gender,Tickets_Sold,Rank
0,Inverse reciprocal secured line,F,100,1
1,Open-source system-worthy middleware,F,93,2
2,Self-enabling real-time task-force,F,85,3
3,Decentralized bottom-line collaboration,F,81,4
4,Automated foreground success,F,70,5


### Question 15: Top 5 Movies by Ticket Sales for Each Cinema (2012-2018)
For each cinema, find the top 5 movies in terms of the total number of tickets sold from 2012 to 2018.

In [30]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    WITH CinemaSales AS (
        SELECT
            S.address || ', ' || S.state || ', ' || S.city  AS Cinema,
            M.title AS Movie,
            COUNT(TT.id) AS Tickets_Sold
        FROM TICKET_TRANSACTION TT
        JOIN SESSION S ON TT.sessionId = S.id
        JOIN MOVIE M ON S.movieId = M.id
        WHERE strftime('%Y', S.dateTime) BETWEEN '2012' AND '2018'
        GROUP BY Cinema, M.title
    ),
    RankedMovies AS (
        SELECT
            Cinema,
            Movie,
            Tickets_Sold,
            RANK() OVER(PARTITION BY Cinema ORDER BY Tickets_Sold DESC) AS Movie_Rank
        FROM CinemaSales
    )
    SELECT *
    FROM RankedMovies
    WHERE Movie_Rank <= 5
    ORDER BY Cinema, Movie_Rank;

'''

df15 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 0.61 seconds


In [31]:
df15.head()

Unnamed: 0,Cinema,Movie,Tickets_Sold,Movie_Rank
0,"0083 Jasmine Canyon Suite 151, Illinois, Sprin...",Stand-alone multimedia budgetary management,308,1
1,"0083 Jasmine Canyon Suite 151, Illinois, Sprin...",Cross-platform high-level open system,212,2
2,"0083 Jasmine Canyon Suite 151, Illinois, Sprin...",Horizontal optimal access,185,3
3,"0083 Jasmine Canyon Suite 151, Illinois, Sprin...",Optional solution-oriented superstructure,183,4
4,"0083 Jasmine Canyon Suite 151, Illinois, Sprin...",Optional bifurcated archive,181,5


### Question 16: 5-Week Moving Average of Total Sales in 2017
Compute the 5-week moving average of total sales, for each week in 2017.

In [32]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    WITH Sales_Weeks AS (
        SELECT
            strftime('%W', TT.dateTime) AS Week,
            SUM(TT.totalPrice) AS Total_Sales
        FROM TICKET_TRANSACTION TT
        WHERE strftime('%Y', TT.dateTime) = '2017'
        GROUP BY Week
    ),
    Moving_Averages AS (
        SELECT
            Week,
            AVG(Total_Sales) OVER(ORDER BY Week ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS Moving_Average
        FROM Sales_Weeks
    )
    SELECT * FROM Moving_Averages
'''

df16 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 4.10 seconds


In [33]:
df16.head()

Unnamed: 0,Week,Moving_Average
0,0,16556.0
1,1,48993.5
2,2,41454.333333
3,3,35419.5
4,4,34317.6


### Question 17: Largest Three 5-Week Moving Averages of Total Sales in 2014
Compute the largest three 5-week moving averages of total sales, among the weeks in 2014.

In [34]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    WITH Sales_Weeks AS (
        SELECT
            strftime('%W', TT.dateTime) AS Week,
            SUM(TT.totalPrice) AS Total_Sales
        FROM TICKET_TRANSACTION TT
        WHERE strftime('%Y', TT.dateTime) = '2014'
        GROUP BY Week
    ),
    Moving_Averages AS (
        SELECT
            Week,
            AVG(Total_Sales) OVER(ORDER BY Week ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS Moving_Average
        FROM Sales_Weeks
    )
    SELECT * FROM Moving_Averages
    ORDER BY Moving_Average DESC
    LIMIT 3
'''

df17 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 4.12 seconds


In [35]:
df17.head()

Unnamed: 0,Week,Moving_Average
0,50,116332.0
1,49,104289.0
2,29,97592.0


### Question 18: Largest 4-Week Moving Average of Total Sales for Each Cinema (2015-2018)
For each cinema, compute the largest 4-week moving average of total sales from 2015 to 2018.

In [36]:
conn = sqlite3.connect(db_name)
start = time.time()

query = '''
    WITH WeeklySales AS (
        SELECT
            S.address || ', ' || S.state || ', ' || S.city  AS Cinema,
            strftime('%Y-%W', S.dateTime) AS Week,
            SUM(TT.totalPrice) AS Total_Sales
        FROM TICKET_TRANSACTION TT
        JOIN SESSION S ON TT.sessionId = S.id
        WHERE strftime('%Y', S.dateTime) BETWEEN '2015' AND '2018'
        GROUP BY Cinema, Week
    ),
    MovingAverages AS (
        SELECT
            Cinema,
            Week,
            AVG(Total_Sales) OVER(PARTITION BY Cinema ORDER BY Week ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) AS Four_Week_Moving_Average
        FROM WeeklySales
    )
    SELECT Cinema, MAX(Four_Week_Moving_Average) AS Largest_Moving_Average
    FROM MovingAverages
    GROUP BY Cinema
    ORDER BY Cinema;
'''

df18 = pd.read_sql_query(query, conn)
print(f"Time taken: {time.time() - start:.2f} seconds")

conn.close()

Time taken: 5.10 seconds


In [37]:
df18.head()

Unnamed: 0,Cinema,Largest_Moving_Average
0,"0083 Jasmine Canyon Suite 151, Illinois, Sprin...",11165.0
1,"0151 Nicole Road Apt. 213, Washington, Tacoma",14286.25
2,"049 William Parkways, Georgia, Atlanta",10845.0
3,"0794 Miller Place, Pennsylvania, Allentown",9496.5
4,"1036 Edwards Flats Suite 078, Illinois, Peoria",10806.75


# Export result

In [38]:
import pandas as pd

dataframes = [df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13, df14, df15, df16, df17, df18]

for i, df in enumerate(dataframes, start=1):
    file_name = f"result/q{i}_index.csv"
    df.to_csv(file_name, index=False)
