# **NYC Flight Data**

This mini-project deals with the NYC flight dataset in R. First, we use R to obtain the dataset, as described in the assignment description. 

To export from R, use this command to create a CSV file optimized for importing into SQL. We want to exclude column names because that is easier to do here than in the BULK IMPORT, and we want string values to not be enclosed in quotation marks. (We can safely do this since upon inspection the data does not contain commas.)

```
write.table(nycflights13::airlines, file="airlines.csv", sep=',', na='', col.names=FALSE, quote=FALSE)

```

Run similar commands for the other files.

## Importing the data

To create the docker container, run the following command, with "C:\\Users\\path-to-data" substituted by the appropriate path on your machine, and `yourStrong()Password` also appropriately replaced if desired. The `-v` argument creates a bind mount so that the Docker container can access data on your machine.

```
docker run -v "C:\Users\path-to-data":/var/nycflights13 -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=yourStrong()Password" -p 1433:1433 --name nycflights --hostname nycflights -d mcr.microsoft.com/mssql/server:2019-latest

```

We want to import the tables `flights.txt`, `airlines.txt`, `airports.txt`, `flights.txt`, `planes.txt`, `weather.txt`.

In [2]:
CREATE DATABASE MyLabProject;

In [19]:
USE MyLabProject;

DROP TABLE IF EXISTS Airlines;
CREATE TABLE Airlines (
	id tinyint NOT NULL, 
    carrier CHAR(2) NOT NULL,
    name VARCHAR(100) NOT NULL
);

BULK INSERT [MyLabProject].[dbo].[Airlines]
FROM '/var/nycflights13/airlines.csv'
WITH ( 
    FIELDTERMINATOR = ',', 
    ROWTERMINATOR = '0x0d', -- your end of line character might vary
    KEEPNULLS );

In [24]:
USE MyLabProject; 

DROP TABLE IF EXISTS Planes;
CREATE TABLE Planes (
	id int NOT NULL,
    tailnum CHAR(6) NOT NULL,
    year INT,
    type VARCHAR(100) NOT NULL,
    manufacturer VARCHAR(100),
    model VARCHAR(100),
    engines INT,
    seats INT,
    speed INT, 
    engine VARCHAR(100)
);

BULK INSERT [MyLabProject].[dbo].[Planes]
FROM '/var/nycflights13/planes.csv'
WITH ( 
    FIELDTERMINATOR = ',', 
    ROWTERMINATOR = '0x0d', -- your end of line character might vary
    KEEPNULLS );

In [27]:
USE MyLabProject; 

DROP TABLE IF EXISTS Weather;
CREATE TABLE Weather (
    id INT NOT NULL, 
    origin CHAR(5), 
    year INT,
    month INT,
    day INT,
    hour INT,
    temp FLOAT,
    dewp FLOAT,
    humid FLOAT,
    wind_dir INT,
    wind_speed FLOAT,
    wind_gust FLOAT,
    precip FLOAT,
    pressure FLOAT,
    visib FLOAT,
    time_hour DATETIME
);

BULK INSERT [MyLabProject].[dbo].[Weather]
FROM '/var/nycflights13/weather.csv'
WITH ( 
    FIELDTERMINATOR = ',', 
    ROWTERMINATOR = '0x0d', -- your end of line character might vary
    KEEPNULLS );

In [26]:
USE MyLabProject; 

DROP TABLE IF EXISTS Flights;
CREATE TABLE Flights (
    id INT NOT NULL,
    year INT NOT NULL,
    month INT NOT NULL,
    day INT NOT NULL,
    dep_time INT,
    sched_dep_time INT, -- actual flight departure time, provided as HHMM or HMM, ex. 529 or 1100
    dep_delay INT, 
    arr_time INT,
    sched_arr_time INT,
    arr_delay INT,
    carrier CHAR(4),
    flight INT,
    tailnum CHAR(8),
    origin CHAR(5),
    dest CHAR(5),
    air_time INT,
    distance INT,
    hour INT,
    minute INT,
    time_hour DATETIME
);

BULK INSERT [MyLabProject].[dbo].[Flights]
FROM '/var/nycflights13/flights.csv'
WITH ( 
    FIELDTERMINATOR = ',', 
    ROWTERMINATOR = '0x0d', -- your end of line character might vary
    KEEPNULLS );

In [28]:
USE MyLabProject;

DROP TABLE IF EXISTS Airports;
CREATE TABLE Airports (
    id VARCHAR(100) NOT NULL, -- each row is numbered as a string
    faa CHAR(5),
    name VARCHAR(100),
    lat FLOAT,
    lon FLOAT,
    alt INT,
    tz INT,
    dst CHAR(3), -- whether the airport was in daylight savings: "A" for yes, "N" for no, "U" for unknown
    timezone VARCHAR(100) -- IANA time zone,  ex. "America/New_York"
);

BULK INSERT [MyLabProject].[dbo].[Airports]
FROM '/var/nycflights13/airports.csv'
WITH ( 
    FIELDTERMINATOR = ',', 
    ROWTERMINATOR = '0x0d', -- your end of line character might vary
    KEEPNULLS );

## Running some queries!

Now we should be ready to answer some questions about the data.

In [29]:
USE MyLabProject;

-- query for task 1
SELECT TOP (1) tailnum, count(tailnum) AS count 
    FROM [MyLabProject].[dbo].[Flights]
    group by tailnum
    order by count desc
;

-- query for task 2
SELECT TOP (1) tailnum, SUM(distance) AS total_miles
    FROM [MyLabProject].[dbo].[Flights]
    WHERE tailnum IS NOT NULL
    group by tailnum
    order by total_miles desc
;

-- query for task 3
SELECT TOP (1) a.name, AVG(w.temp) as avg_temp
    FROM [MyLabProject].[dbo].[Flights] as f 
    JOIN [MyLabProject].[dbo].[Weather] as w
    ON w.time_hour = f.time_hour
    JOIN [MyLabProject].[dbo].[Airlines] as a 
    ON f.carrier = a.carrier
    group by a.name
    order by avg_temp desc
;

tailnum,count
N725MQ,575


tailnum,total_miles
N328AA,939101


name,avg_temp
SkyWest Airlines Inc.,68.43499999999999
