In [21]:
import pandas as pd
import numpy as np


### 태양풍 데이터 전처리

- 날짜 기준으로 데이터 전처리 : 시간 기준의 데이터를 날짜별 평균으로 처리


- column별 데이터의 의미

    - date: 데이터가 관측된 날짜와 시간입니다.

    - bt: 자기장의 총 세기 (Total Magnetic Field Strength) 입니다. 단위는 보통 나노테슬라(nT)를 사용하며, 이 값이 클수록 자기장 자체가 강하다는 의미입니다.

    - theta_gse / phi_gse: GSE 좌표계를 기준으로 자기장 벡터의 방향을 나타내는 각도입니다.

    - theta_gse (세타): 위도 각도. Z축(황도면에 수직인 방향)과의 각도입니다.

    - phi_gse (파이): 경도 각도. X축(지구-태양 방향)에서 XY평면을 따라 측정한 각도입니다.

    - theta_gsm / phi_gsm: GSM 좌표계를 기준으로 자기장 벡터의 방향을 나타내는 각도입니다.

    - theta_gsm (세타): 위도 각도.

    - phi_gsm (파이): 경도 각도.

In [66]:
# mag_org.csv 파일 불러오기
df_mag = pd.read_csv('/home/roni/dev_ws/ufo_eda/data/mag_org.csv')

In [67]:
df_mag.dtypes
# 데이터 타입 확인 -> date column 을 datetime 타입으로 변경 필요

date          object
bt           float64
theta_gse    float64
phi_gse      float64
theta_gsm    float64
phi_gsm      float64
dtype: object

In [68]:
# date column 을 날짜와 시간으로 분리
# 나중에 날짜 기준으로 groupby하여 평균값으로 날짜 기준 데이터로 만들기 위함

list_date = []
list_time = []

for date in df_mag['date']:
    only_date = date.split(' ')[0]
    only_time = date.split(' ')[1]
    
    list_date.append(only_date)
    list_time.append(only_time)
    
    
df_mag['date'] = list_date
df_mag['time'] = list_time
    
    

In [69]:
df_mag['date'] = pd.to_datetime(df_mag['date'])
df_mag['time'] = pd.to_datetime(df_mag['time'])

  df_mag['time'] = pd.to_datetime(df_mag['time'])


In [70]:
# 날짜 기준으로 평균을 내서 날짜 기준 데이터로 변경
df_mag = df_mag.groupby(df_mag['date'], as_index = False).mean(numeric_only = True)

In [71]:
df_mag = df_mag.drop(columns = 'time')

KeyError: "['time'] not found in axis"

In [72]:
df_mag

Unnamed: 0,date,bt,theta_gse,phi_gse,theta_gsm,phi_gsm
0,2018-11-08,4.201618,1.639833,124.072722,-7.467917,126.339301
1,2018-11-09,5.654160,-18.319604,203.009965,-18.635729,221.180469
2,2018-11-10,8.013868,1.358437,286.612578,10.584306,283.317374
3,2018-11-11,4.885163,5.908899,279.866334,13.607603,272.854985
4,2018-11-12,4.114291,-4.749343,231.083344,-0.697394,225.140376
...,...,...,...,...,...,...
1626,2023-11-05,19.863812,-1.501554,220.622625,6.811093,232.189722
1627,2023-11-06,11.346820,-9.671675,241.561084,-1.399630,237.211461
1628,2023-11-07,6.045146,9.152118,206.763660,10.159028,193.063896
1629,2023-11-08,5.563336,12.647262,258.011492,19.430773,232.937846


#### 태양풍 데이터 데이터 베이스 저장

In [73]:
import mysql.connector
import os
from dotenv import load_dotenv
from mysql.connector import Error

In [74]:
load_dotenv()
aws_db_ufo_pw = os.getenv('aws_db_ufo_pw')
aws_host_ufo = os.getenv('aws_host_ufo')


In [80]:


remote = mysql.connector.connect(
    host = aws_host_ufo,
    user = 'EDA',
    port = 3306,  # default 는 3306
    password = aws_db_ufo_pw,
    database = 'Frisbee'

)

cursor = remote.cursor()



In [76]:
cursor.close()

True

In [81]:
df_mag

Unnamed: 0,date,bt,theta_gse,phi_gse,theta_gsm,phi_gsm
0,2018-11-08,4.201618,1.639833,124.072722,-7.467917,126.339301
1,2018-11-09,5.654160,-18.319604,203.009965,-18.635729,221.180469
2,2018-11-10,8.013868,1.358437,286.612578,10.584306,283.317374
3,2018-11-11,4.885163,5.908899,279.866334,13.607603,272.854985
4,2018-11-12,4.114291,-4.749343,231.083344,-0.697394,225.140376
...,...,...,...,...,...,...
1626,2023-11-05,19.863812,-1.501554,220.622625,6.811093,232.189722
1627,2023-11-06,11.346820,-9.671675,241.561084,-1.399630,237.211461
1628,2023-11-07,6.045146,9.152118,206.763660,10.159028,193.063896
1629,2023-11-08,5.563336,12.647262,258.011492,19.430773,232.937846


In [82]:
table_name = 'mag_strength_data'

query_create_table = f"""
        CREATE TABLE IF NOT EXISTS {table_name} (
            id INT AUTO_INCREMENT PRIMARY KEY,
            date DATETIME,
            bt FLOAT,
            theta_gse FLOAT,
            phi_gse FLOAT,
            theta_gsm FLOAT,
            phi_gsm FLOAT
        )
        """
        
cursor.execute(query_create_table)
remote.commit()
print(f"테이블 '{table_name}'이(가) 성공적으로 생성되었거나 이미 존재합니다.")


테이블 'mag_strength_data'이(가) 성공적으로 생성되었거나 이미 존재합니다.


In [83]:

for idx, row in df_mag.iterrows():

    query = f"""
    INSERT INTO {table_name} (date, bt, theta_gse, phi_gse, theta_gsm, phi_gsm)
    VALUES (%s,%s,%s,%s,%s,%s)
    """

    cursor.execute(query, tuple(row))
    
    
remote.commit()
print(f"{cursor.rowcount} 개의 ROW가 {table_name} 테이블에 성공적으로 삽입 되었습니다")

1 개의 ROW가 mag_strength_data 테이블에 성공적으로 삽입 되었습니다


In [186]:
df_mag.to_csv('/home/roni/dev_ws/ufo_eda/data/mag_org_pp.csv')

### UFO data Preprocess

In [146]:
# ufo_data load
df_ufo = pd.read_csv('/home/roni/dev_ws/ufo_eda/data/ufo/complete.csv', on_bad_lines='skip')

  df_ufo = pd.read_csv('/home/roni/dev_ws/ufo_eda/data/ufo/complete.csv', on_bad_lines='skip')


In [147]:
# data type 확인
df_ufo.dtypes

datetime                 object
city                     object
state                    object
country                  object
shape                    object
duration (seconds)       object
duration (hours/min)     object
comments                 object
date posted              object
latitude                 object
longitude               float64
dtype: object

In [148]:
# datetime 날짜와 시간 분리
list_date = []
list_time = []
for dt in df_ufo['datetime']:
    only_date = dt.split(' ')[0]
    only_time = dt.split(' ')[1]
    
    list_date.append(only_date)
    list_time.append(only_time)
    
# 데이터 프레임에 date 컬럼과 time 컬럼을 만들어서 나누어진 데이터 적용
df_ufo_date_splited = df_ufo
df_ufo_date_splited['date'] = list_date
df_ufo_date_splited['time'] = list_time
    

In [149]:
new_col = ['date', 'time', 'datetime', 'city', 'state', 'country', 'shape', 'duration (seconds)', 'duration (hours/min)', 'comments', 'date posted', 'latitude', 'longitude']

In [150]:
# 컬럼 순서 바꾸기

new_col = ['date', 'time', 'datetime', 'city', 'state', 'country',
        'shape', 'duration (seconds)', 'duration (hours/min)', 
        'comments', 'date posted', 'latitude', 'longitude']

df_ufo_f = df_ufo_date_splited[new_col]

# 기존 datetime 컬럼 제거
df_ufo_f = df_ufo_f.drop(columns= 'datetime')


In [151]:
# 국가 미국만 추철
df_ufo_us = df_ufo_f.loc[df_ufo_f['country'] == 'us']
df_ufo_us.reset_index(drop = True)

Unnamed: 0,date,time,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949,20:30,san marcos,tx,us,cylinder,2700,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8830556,-97.941111
1,10/10/1956,21:00,edna,tx,us,circle,20,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783333,-96.645833
2,10/10/1960,20:00,kaneohe,hi,us,light,900,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.4180556,-157.803611
3,10/10/1961,19:00,bristol,tn,us,sphere,300,5 minutes,My father is now 89 my brother 52 the girl wit...,4/27/2007,36.5950000,-82.188889
4,10/10/1965,23:45,norwalk,ct,us,disk,1200,20 minutes,A bright orange color changing to reddish colo...,10/2/1999,41.1175000,-73.408333
...,...,...,...,...,...,...,...,...,...,...,...,...
70288,9/9/2013,22:00,napa,ca,us,other,1200.0,hour,Napa UFO&#44,9/30/2013,38.297222,-122.284444
70289,9/9/2013,22:20,vienna,va,us,circle,5.0,5 seconds,Saw a five gold lit cicular craft moving fastl...,9/30/2013,38.901111,-77.265556
70290,9/9/2013,23:00,edmond,ok,us,cigar,1020.0,17 minutes,2 witnesses 2 miles apart&#44 Red &amp; White...,9/30/2013,35.652778,-97.477778
70291,9/9/2013,23:00,starr,sc,us,diamond,0.0,2 nights,On September ninth my wife and i noticed stran...,9/30/2013,34.376944,-82.695833


In [152]:
df_ufo_us.dtypes

date                     object
time                     object
city                     object
state                    object
country                  object
shape                    object
duration (seconds)       object
duration (hours/min)     object
comments                 object
date posted              object
latitude                 object
longitude               float64
dtype: object

In [153]:
df_ufo_us['date'] = pd.to_datetime(df_ufo_us['date'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ufo_us['date'] = pd.to_datetime(df_ufo_us['date'])


In [154]:
df_ufo_us

Unnamed: 0,date,time,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,1949-10-10,20:30,san marcos,tx,us,cylinder,2700,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8830556,-97.941111
3,1956-10-10,21:00,edna,tx,us,circle,20,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783333,-96.645833
4,1960-10-10,20:00,kaneohe,hi,us,light,900,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.4180556,-157.803611
5,1961-10-10,19:00,bristol,tn,us,sphere,300,5 minutes,My father is now 89 my brother 52 the girl wit...,4/27/2007,36.5950000,-82.188889
7,1965-10-10,23:45,norwalk,ct,us,disk,1200,20 minutes,A bright orange color changing to reddish colo...,10/2/1999,41.1175000,-73.408333
...,...,...,...,...,...,...,...,...,...,...,...,...
88674,2013-09-09,22:00,napa,ca,us,other,1200.0,hour,Napa UFO&#44,9/30/2013,38.297222,-122.284444
88675,2013-09-09,22:20,vienna,va,us,circle,5.0,5 seconds,Saw a five gold lit cicular craft moving fastl...,9/30/2013,38.901111,-77.265556
88676,2013-09-09,23:00,edmond,ok,us,cigar,1020.0,17 minutes,2 witnesses 2 miles apart&#44 Red &amp; White...,9/30/2013,35.652778,-97.477778
88677,2013-09-09,23:00,starr,sc,us,diamond,0.0,2 nights,On September ninth my wife and i noticed stran...,9/30/2013,34.376944,-82.695833


In [155]:
df_ufo_us = df_ufo_us.sort_values(by = 'date', ascending = True)
df_ufo_us = df_ufo_us.reset_index(drop = True)
df_ufo_us

Unnamed: 0,date,time,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,1910-01-01,24:00,kirksville (near),mo,us,disk,120,minutes,Historical sighting (1903 - 1913) Northern Mis...,9/15/2005,40.1947222,-92.583056
1,1910-05-28,21:00,solon,me,us,unknown,0,don&#39t know,entry in my great-grandmother&#39s diary&#44da...,12/5/2001,44.9494444,-69.858889
2,1910-06-01,15:00,wills point,tx,us,cigar,120,2 minutes,Cigar shaped object moving from West to East,4/16/2005,32.7091667,-96.008056
3,1920-06-11,21:00,cicero,in,us,unknown,60,1 minute,((NUFORC Note: Probable hoax. Note date. PD...,5/12/2009,40.1238889,-86.013333
4,1925-12-28,18:00,atkinson (6 miles north of),il,us,disk,60,1 minute,Young boy witnesses disc in sky above Illinois...,5/11/2005,41.4208333,-90.015000
...,...,...,...,...,...,...,...,...,...,...,...,...
70288,2014-05-07,09:30,port richey,fl,us,unknown,1,1 second,Sonic boom denied by military source per McDil...,5/8/2014,28.2713889,-82.719722
70289,2014-05-07,03:30,calera,al,us,circle,240,3-4 minutes,Orange/red sphere with blue or green outline o...,5/8/2014,33.1027778,-86.753611
70290,2014-05-07,00:00,detroit,mi,us,fireball,180,3 minutes,Fire balls in detroit sky.,5/8/2014,42.3313889,-83.045833
70291,2014-05-07,20:30,seagrove beach,fl,us,fireball,60,<1 minute,Around 8:30 pm I was on the balcony of our con...,5/8/2014,30.3183333,-86.130278


In [156]:
df_ufo_us_mod = df_ufo_us.loc[df_ufo_us['date'] > '1994-12-31']

In [157]:
df_ufo_us_mod.reset_index(drop = True)

Unnamed: 0,date,time,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,1995-01-01,02:00,el granada,ca,us,fireball,45,45 seconds,Glowing fireball object crashed at Maverick&#...,11/4/2002,37.5027778,-122.468333
1,1995-01-01,15:00,cedar key,fl,us,triangle,180,3 minutes,I could hear and see it&#44 close encounter.,8/16/2002,29.1383333,-83.035278
2,1995-01-01,15:00,greenwood,sc,us,sphere,20,20 seconds,Bright red sphere floating and completely sile...,7/14/2013,34.1952778,-82.161944
3,1995-01-01,21:00,tucson,az,us,triangle,600,5-10 minutes,120-140 ft&#44 X 35-40 ft&#44 X 30 ft&#44 tria...,3/7/1998,32.2216667,-110.925833
4,1995-01-01,00:02,pacific grove,ca,us,,0,,Rept. that local police had pursued a strange ...,11/2/1999,36.6177778,-121.915556
...,...,...,...,...,...,...,...,...,...,...,...,...
63782,2014-05-07,09:30,port richey,fl,us,unknown,1,1 second,Sonic boom denied by military source per McDil...,5/8/2014,28.2713889,-82.719722
63783,2014-05-07,03:30,calera,al,us,circle,240,3-4 minutes,Orange/red sphere with blue or green outline o...,5/8/2014,33.1027778,-86.753611
63784,2014-05-07,00:00,detroit,mi,us,fireball,180,3 minutes,Fire balls in detroit sky.,5/8/2014,42.3313889,-83.045833
63785,2014-05-07,20:30,seagrove beach,fl,us,fireball,60,<1 minute,Around 8:30 pm I was on the balcony of our con...,5/8/2014,30.3183333,-86.130278


In [158]:
df_ufo_us_mod['shape'] = df_ufo_us_mod['shape'].fillna('unknown')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ufo_us_mod['shape'] = df_ufo_us_mod['shape'].fillna('unknown')


In [159]:
df_ufo_us_mod = df_ufo_us_mod.drop(columns = ['duration (hours/min)','date posted', 'country'])

In [160]:
df_ufo_us_mod

Unnamed: 0,date,time,city,state,shape,duration (seconds),comments,latitude,longitude
6506,1995-01-01,02:00,el granada,ca,fireball,45,Glowing fireball object crashed at Maverick&#...,37.5027778,-122.468333
6507,1995-01-01,15:00,cedar key,fl,triangle,180,I could hear and see it&#44 close encounter.,29.1383333,-83.035278
6508,1995-01-01,15:00,greenwood,sc,sphere,20,Bright red sphere floating and completely sile...,34.1952778,-82.161944
6509,1995-01-01,21:00,tucson,az,triangle,600,120-140 ft&#44 X 35-40 ft&#44 X 30 ft&#44 tria...,32.2216667,-110.925833
6510,1995-01-01,00:02,pacific grove,ca,unknown,0,Rept. that local police had pursued a strange ...,36.6177778,-121.915556
...,...,...,...,...,...,...,...,...,...
70288,2014-05-07,09:30,port richey,fl,unknown,1,Sonic boom denied by military source per McDil...,28.2713889,-82.719722
70289,2014-05-07,03:30,calera,al,circle,240,Orange/red sphere with blue or green outline o...,33.1027778,-86.753611
70290,2014-05-07,00:00,detroit,mi,fireball,180,Fire balls in detroit sky.,42.3313889,-83.045833
70291,2014-05-07,20:30,seagrove beach,fl,fireball,60,Around 8:30 pm I was on the balcony of our con...,30.3183333,-86.130278


#### UFO data 데이터 베이스 저장

In [170]:
import mysql.connector
import os
from dotenv import load_dotenv
from mysql.connector import Error

In [171]:
load_dotenv()
aws_db_ufo_pw = os.getenv('aws_db_ufo_pw')
aws_host_ufo = os.getenv('aws_host_ufo')


In [177]:


remote = mysql.connector.connect(
    host = aws_host_ufo,
    user = 'EDA',
    port = 3306,  # default 는 3306
    password = aws_db_ufo_pw,
    database = 'Frisbee'

)

cursor = remote.cursor()



In [173]:
cursor.close()

True

In [178]:
df_ufo_us_mod

Unnamed: 0,date,time,city,state,shape,duration (seconds),comments,latitude,longitude
6506,1995-01-01,02:00,el granada,ca,fireball,45,Glowing fireball object crashed at Maverick&#...,37.5027778,-122.468333
6507,1995-01-01,15:00,cedar key,fl,triangle,180,I could hear and see it&#44 close encounter.,29.1383333,-83.035278
6508,1995-01-01,15:00,greenwood,sc,sphere,20,Bright red sphere floating and completely sile...,34.1952778,-82.161944
6509,1995-01-01,21:00,tucson,az,triangle,600,120-140 ft&#44 X 35-40 ft&#44 X 30 ft&#44 tria...,32.2216667,-110.925833
6510,1995-01-01,00:02,pacific grove,ca,unknown,0,Rept. that local police had pursued a strange ...,36.6177778,-121.915556
...,...,...,...,...,...,...,...,...,...
70288,2014-05-07,09:30,port richey,fl,unknown,1,Sonic boom denied by military source per McDil...,28.2713889,-82.719722
70289,2014-05-07,03:30,calera,al,circle,240,Orange/red sphere with blue or green outline o...,33.1027778,-86.753611
70290,2014-05-07,00:00,detroit,mi,fireball,180,Fire balls in detroit sky.,42.3313889,-83.045833
70291,2014-05-07,20:30,seagrove beach,fl,fireball,60,Around 8:30 pm I was on the balcony of our con...,30.3183333,-86.130278


In [182]:
df_ufo_us_mod.dropna(inplace = True)

In [183]:
df_ufo_us_mod

Unnamed: 0,date,time,city,state,shape,duration (seconds),comments,latitude,longitude
6506,1995-01-01,02:00,el granada,ca,fireball,45,Glowing fireball object crashed at Maverick&#...,37.5027778,-122.468333
6507,1995-01-01,15:00,cedar key,fl,triangle,180,I could hear and see it&#44 close encounter.,29.1383333,-83.035278
6508,1995-01-01,15:00,greenwood,sc,sphere,20,Bright red sphere floating and completely sile...,34.1952778,-82.161944
6509,1995-01-01,21:00,tucson,az,triangle,600,120-140 ft&#44 X 35-40 ft&#44 X 30 ft&#44 tria...,32.2216667,-110.925833
6510,1995-01-01,00:02,pacific grove,ca,unknown,0,Rept. that local police had pursued a strange ...,36.6177778,-121.915556
...,...,...,...,...,...,...,...,...,...
70288,2014-05-07,09:30,port richey,fl,unknown,1,Sonic boom denied by military source per McDil...,28.2713889,-82.719722
70289,2014-05-07,03:30,calera,al,circle,240,Orange/red sphere with blue or green outline o...,33.1027778,-86.753611
70290,2014-05-07,00:00,detroit,mi,fireball,180,Fire balls in detroit sky.,42.3313889,-83.045833
70291,2014-05-07,20:30,seagrove beach,fl,fireball,60,Around 8:30 pm I was on the balcony of our con...,30.3183333,-86.130278


In [184]:
table_name = 'UFO_us'

query_create_table = f"""
        CREATE TABLE IF NOT EXISTS {table_name} (
            id INT AUTO_INCREMENT PRIMARY KEY,
            date DATETIME,
            time DATETIME,
            city CHAR(16),
            state CHAR(4),
            shape CHAR(16),
            duration INT,
            comments TEXT,
            latitude FLOAT,
            longitude FLOAT
        )
        """
        
cursor.execute(query_create_table)
remote.commit()
print(f"테이블 '{table_name}'이(가) 성공적으로 생성되었거나 이미 존재합니다.")


테이블 'UFO_us'이(가) 성공적으로 생성되었거나 이미 존재합니다.


In [185]:

for idx, row in df_ufo_us_mod.iterrows():

    query = f"""
    INSERT INTO {table_name} (date, time, city, state, shape, duration, comments, latitude, longitude)
    VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)
    """

    cursor.execute(query, tuple(row))
    
    
remote.commit()
print(f"{cursor.rowcount} 개의 ROW가 {table_name} 테이블에 성공적으로 삽입 되었습니다")

1 개의 ROW가 UFO_us 테이블에 성공적으로 삽입 되었습니다


In [187]:
df_ufo_us_mod.to_csv('/home/roni/dev_ws/ufo_eda/data/ufo_us.csv')

In [188]:
df_mag

Unnamed: 0,date,bt,theta_gse,phi_gse,theta_gsm,phi_gsm
0,2018-11-08,4.201618,1.639833,124.072722,-7.467917,126.339301
1,2018-11-09,5.654160,-18.319604,203.009965,-18.635729,221.180469
2,2018-11-10,8.013868,1.358437,286.612578,10.584306,283.317374
3,2018-11-11,4.885163,5.908899,279.866334,13.607603,272.854985
4,2018-11-12,4.114291,-4.749343,231.083344,-0.697394,225.140376
...,...,...,...,...,...,...
1626,2023-11-05,19.863812,-1.501554,220.622625,6.811093,232.189722
1627,2023-11-06,11.346820,-9.671675,241.561084,-1.399630,237.211461
1628,2023-11-07,6.045146,9.152118,206.763660,10.159028,193.063896
1629,2023-11-08,5.563336,12.647262,258.011492,19.430773,232.937846


In [193]:
df_ufo_us = df_ufo_us_mod.reset_index(drop=True)
df_ufo_us.to_csv('/home/roni/dev_ws/ufo_eda/data/ufo_us.csv')

In [198]:


us_state_to_name_lower = {
    'ak': 'alaska', 'al': 'alabama', 'ar': 'arkansas', 'az': 'arizona',
    'ca': 'california', 'co': 'colorado', 'ct': 'connecticut', 'dc': 'district of columbia',
    'de': 'delaware', 'fl': 'florida', 'ga': 'georgia', 'hi': 'hawaii',
    'ia': 'iowa', 'id': 'idaho', 'il': 'illinois', 'in': 'indiana',
    'ks': 'kansas', 'ky': 'kentucky', 'la': 'louisiana', 'ma': 'massachusetts',
    'md': 'maryland', 'me': 'maine', 'mi': 'michigan', 'mn': 'minnesota',
    'mo': 'missouri', 'ms': 'mississippi', 'mt': 'montana', 'nc': 'north carolina',
    'nd': 'north dakota', 'ne': 'nebraska', 'nh': 'new hampshire', 'nj': 'new jersey',
    'nm': 'new mexico', 'nv': 'nevada', 'ny': 'new york', 'oh': 'ohio',
    'ok': 'oklahoma', 'or': 'oregon', 'pa': 'pennsylvania', 'ri': 'rhode island',
    'sc': 'south carolina', 'sd': 'south dakota', 'tn': 'tennessee', 'tx': 'texas',
    'ut': 'utah', 'va': 'virginia', 'vt': 'vermont', 'wa': 'washington',
    'wi': 'wisconsin', 'wv': 'west virginia', 'wy': 'wyoming'
}

In [201]:
df_ufo_us['state_name'] = df_ufo_us['state'].map(us_state_to_name_lower)

In [202]:
df_ufo_us

Unnamed: 0,date,time,city,state,shape,duration (seconds),comments,latitude,longitude,state_name
0,1995-01-01,02:00,el granada,ca,fireball,45,Glowing fireball object crashed at Maverick&#...,37.5027778,-122.468333,california
1,1995-01-01,15:00,cedar key,fl,triangle,180,I could hear and see it&#44 close encounter.,29.1383333,-83.035278,florida
2,1995-01-01,15:00,greenwood,sc,sphere,20,Bright red sphere floating and completely sile...,34.1952778,-82.161944,south carolina
3,1995-01-01,21:00,tucson,az,triangle,600,120-140 ft&#44 X 35-40 ft&#44 X 30 ft&#44 tria...,32.2216667,-110.925833,arizona
4,1995-01-01,00:02,pacific grove,ca,unknown,0,Rept. that local police had pursued a strange ...,36.6177778,-121.915556,california
...,...,...,...,...,...,...,...,...,...,...
63756,2014-05-07,09:30,port richey,fl,unknown,1,Sonic boom denied by military source per McDil...,28.2713889,-82.719722,florida
63757,2014-05-07,03:30,calera,al,circle,240,Orange/red sphere with blue or green outline o...,33.1027778,-86.753611,alabama
63758,2014-05-07,00:00,detroit,mi,fireball,180,Fire balls in detroit sky.,42.3313889,-83.045833,michigan
63759,2014-05-07,20:30,seagrove beach,fl,fireball,60,Around 8:30 pm I was on the balcony of our con...,30.3183333,-86.130278,florida


In [203]:
df_ufo_us_state = df_ufo_us.groupby(by = 'state_name').size()

In [204]:
df_ufo_us_state

state_name
alabama                  616
alaska                   302
arizona                 2504
arkansas                 569
california              8748
colorado                1418
connecticut              846
delaware                 158
district of columbia       7
florida                 3863
georgia                 1234
hawaii                   249
idaho                    525
illinois                2484
indiana                 1261
iowa                     661
kansas                   586
kentucky                 811
louisiana                517
maine                    521
maryland                 797
massachusetts           1183
michigan                1759
minnesota                994
mississippi              342
missouri                1410
montana                  484
nebraska                 365
nevada                   800
new hampshire            463
new jersey              1196
new mexico               713
new york                2802
north carolina          1703
nor

### Space weather

#### 전처리

In [1]:
import pandas as pd

In [24]:
df_sw = pd.read_csv('/home/roni/dev_ws/ufo_eda/data/space_weather.csv')

##### 데이터 정보
컬럼 설명

- DATE: 관측 날짜 (예: 1957-10-01)

- BSRN: 바텔스 태양 회전 번호 (태양의 자전 주기를 기준으로 한 번호)

- ND: 바텔스 회전 주기 내에서의 일(day) 번호

- KP1 ~ KP8: 하루를 3시간 간격으로 나눈 8개의 구간에 대한 Kp 지수 (지구 자기장 활동을 나타내는 지수)

- KP_SUM: 8개 구간의 Kp 지수 일일 합계

- AP1 ~ AP8: 하루를 3시간 간격으로 나눈 8개의 구간에 대한 Ap 지수 (Kp와 유사한 지구 자기장 활동 지수)

- AP_AVG: 8개 구간의 Ap 지수 일일 평균

- CP: Cp 지수 (지구 자기장 활동 관련 지수)

- C9: C9 지수 (지구 자기장 활동 관련 지수)

- ISN: 국제 흑점 수 (태양 활동의 지표)

- F10.7_OBS: 관측된 10.7cm 파장의 태양 전파 플럭스 (태양 활동의 주요 지표)

- F10.7_ADJ: 1 천문단위(AU) 거리 기준으로 보정된 F10.7 플럭스 값

- F10.7_DATA_TYPE: F10.7 데이터 유형 (예: 'OBS' - 관측값)

- F10.7_OBS_CENTER81: 81일 중심 이동 평균을 적용한 관측 F10.7 플럭스

- F10.7_OBS_LAST81: 81일 후행 이동 평균을 적용한 관측 F10.7 플럭스

- F10.7_ADJ_CENTER81: 81일 중심 이동 평균을 적용한 보정 F10.7 플럭스

- F10.7_ADJ_LAST81: 81일 후행 이동 평균을 적용한 보정 F10.7 플럭스





In [25]:
df_sw

Unnamed: 0,DATE,BSRN,ND,KP1,KP2,KP3,KP4,KP5,KP6,KP7,...,CP,C9,ISN,F10.7_OBS,F10.7_ADJ,F10.7_DATA_TYPE,F10.7_OBS_CENTER81,F10.7_OBS_LAST81,F10.7_ADJ_CENTER81,F10.7_ADJ_LAST81
0,1957-10-01,1700,19,43.0,40.0,30.0,20.0,37.0,23.0,43.0,...,1.1,5.0,334,269.3,269.8,OBS,266.6,230.9,266.8,235.5
1,1957-10-02,1700,20,37.0,37.0,17.0,17.0,27.0,23.0,17.0,...,0.7,3.0,331,253.3,253.6,OBS,267.4,231.7,267.5,236.2
2,1957-10-03,1700,21,27.0,20.0,13.0,33.0,37.0,47.0,43.0,...,1.0,5.0,343,266.3,266.4,OBS,268.1,232.7,268.1,237.1
3,1957-10-04,1700,22,30.0,30.0,23.0,27.0,23.0,27.0,30.0,...,0.7,3.0,307,238.2,238.2,OBS,268.8,233.3,268.7,237.7
4,1957-10-05,1700,23,30.0,30.0,17.0,23.0,20.0,27.0,27.0,...,0.6,3.0,310,246.2,246.0,OBS,269.3,233.9,269.1,238.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25087,2041-06-01,2832,14,,,,,,,,...,,,13,69.2,71.1,PRM,69.5,71.1,71.3,71.7
25088,2041-07-01,2833,17,,,,,,,,...,,,12,68.5,70.8,PRM,68.8,69.8,71.0,71.4
25089,2041-08-01,2834,21,,,,,,,,...,,,11,68.4,70.5,PRM,68.7,69.0,70.6,71.1
25090,2041-09-01,2835,25,,,,,,,,...,,,11,68.9,70.2,PRM,69.2,68.7,70.4,70.7


In [26]:
df_sw.dtypes

DATE                   object
BSRN                    int64
ND                      int64
KP1                   float64
KP2                   float64
KP3                   float64
KP4                   float64
KP5                   float64
KP6                   float64
KP7                   float64
KP8                   float64
KP_SUM                float64
AP1                   float64
AP2                   float64
AP3                   float64
AP4                   float64
AP5                   float64
AP6                   float64
AP7                   float64
AP8                   float64
AP_AVG                float64
CP                    float64
C9                    float64
ISN                     int64
F10.7_OBS             float64
F10.7_ADJ             float64
F10.7_DATA_TYPE        object
F10.7_OBS_CENTER81    float64
F10.7_OBS_LAST81      float64
F10.7_ADJ_CENTER81    float64
F10.7_ADJ_LAST81      float64
dtype: object

In [27]:
df_sw.columns

Index(['DATE', 'BSRN', 'ND', 'KP1', 'KP2', 'KP3', 'KP4', 'KP5', 'KP6', 'KP7',
       'KP8', 'KP_SUM', 'AP1', 'AP2', 'AP3', 'AP4', 'AP5', 'AP6', 'AP7', 'AP8',
       'AP_AVG', 'CP', 'C9', 'ISN', 'F10.7_OBS', 'F10.7_ADJ',
       'F10.7_DATA_TYPE', 'F10.7_OBS_CENTER81', 'F10.7_OBS_LAST81',
       'F10.7_ADJ_CENTER81', 'F10.7_ADJ_LAST81'],
      dtype='object')

In [28]:
df_sw_core = df_sw[['DATE', 'AP_AVG', 'ISN', 'F10.7_ADJ']]

In [29]:
df_sw_core['DATE'] = pd.to_datetime(df_sw_core['DATE'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sw_core['DATE'] = pd.to_datetime(df_sw_core['DATE'])


In [30]:
df_sw_core = df_sw_core.loc[df_sw_core['DATE'] > '1993-12-31']
df_sw_core = df_sw_core.loc[df_sw_core['DATE'] < '2025-1-1']



In [31]:
df_sw_core = df_sw_core.sort_values(by = 'DATE', ascending= True)
df_sw_core = df_sw_core.reset_index(drop = True)

In [32]:
df_sw_core

Unnamed: 0,DATE,AP_AVG,ISN,F10.7_ADJ
0,1994-01-01,26.0,126,143.4
1,1994-01-02,21.0,113,141.4
2,1994-01-03,15.0,136,128.7
3,1994-01-04,6.0,151,124.6
4,1994-01-05,4.0,153,132.4
...,...,...,...,...
11318,2024-12-27,2.0,257,250.0
11319,2024-12-28,3.0,254,251.8
11320,2024-12-29,4.0,233,246.3
11321,2024-12-30,6.0,218,216.1


#### Space weather 데이터 베이스 저장

In [87]:
import mysql.connector
import os
from dotenv import load_dotenv
from mysql.connector import Error

In [35]:
load_dotenv()
aws_db_ufo_pw = os.getenv('aws_db_ufo_pw')
aws_host_ufo = os.getenv('aws_host_ufo')


In [36]:
remote = mysql.connector.connect(
    host = aws_host_ufo,
    user = 'EDA',
    port = 3306,  # default 는 3306
    password = aws_db_ufo_pw,
    database = 'Frisbee'

)

cursor = remote.cursor()



In [None]:
cursor.close()

True

In [37]:
df_sw_core

Unnamed: 0,DATE,AP_AVG,ISN,F10.7_ADJ
0,1994-01-01,26.0,126,143.4
1,1994-01-02,21.0,113,141.4
2,1994-01-03,15.0,136,128.7
3,1994-01-04,6.0,151,124.6
4,1994-01-05,4.0,153,132.4
...,...,...,...,...
11318,2024-12-27,2.0,257,250.0
11319,2024-12-28,3.0,254,251.8
11320,2024-12-29,4.0,233,246.3
11321,2024-12-30,6.0,218,216.1


In [39]:
table_name = 'space_weather'

query_create_table = f"""
        CREATE TABLE IF NOT EXISTS {table_name} (
            id INT AUTO_INCREMENT PRIMARY KEY,
            date DATETIME,
            ap_avg FLOAT,
            black_dot_cnt INT,
            f107_adj FLOAT
            )
        """
        
cursor.execute(query_create_table)
remote.commit()
print(f"테이블 '{table_name}'이(가) 성공적으로 생성되었거나 이미 존재합니다.")


테이블 'space_weather'이(가) 성공적으로 생성되었거나 이미 존재합니다.


In [41]:

for idx, row in df_sw_core.iterrows():

    query = f"""
    INSERT INTO {table_name} (date, ap_avg, black_dot_cnt, f107_adj)
    VALUES (%s,%s,%s,%s)
    """

    cursor.execute(query, tuple(row))
    
    
remote.commit()
print(f"{cursor.rowcount} 개의 ROW가 {table_name} 테이블에 성공적으로 삽입 되었습니다")

1 개의 ROW가 space_weather 테이블에 성공적으로 삽입 되었습니다


In [None]:
df_sw_core.to_csv('/home/roni/dev_ws/ufo_eda/data/sw_core.csv')

### 유성우 데이터 

#### preprocess

In [313]:
df_metheo = pd.read_csv('/home/roni/dev_ws/ufo_eda/data/metheo.csv')

In [314]:
df_metheo.head()

Unnamed: 0.1,Unnamed: 0,Address,Latitude,Longitude,Elevation,Local Date & Time,UT Date & Time,Duration,Moving direction,Descent Angle,...,First elevation,Last azimuth,Last elevation,Stellar Magnitude,Color,Observation,Remarks,Length,Name,Experience Level
0,0,"Leilani Estates, HI",19°,-154°,231m,1997-03-27 02:00 HST,1997-03-27 12:00 UT,-,From up left to down right,98°,...,31°,150.79°,13°,-16,White,No,"I was sound asleep and saw 2 white eggs, it wa...",-,Deborah P,2/5
1,1,"Claypool, IN",41°,-85°,272m,2011-07-01 21:30 EDT,2011-07-02 01:30 UT,-,From up left to down right,97°,...,35°,98.42°,29°,-20,White,Unknown,At the time I was running with a friend Katy B...,-,Curtis B,3/5
2,2,"Seguin, TX",29°,-97°,141m,2004-12-17 23:00 CDT,2004-12-18 05:00 UT,-,From up right to down left,181°,...,90°,81.76°,1°,-24,"Green, Light Green",No,It can from almost directly above me and lande...,-,Greg O,3/5
3,3,"Gulf Shores, AL",30°,-87°,5m,2004-10-12 02:14 CDT,2004-10-12 07:14 UT,-,From left to right,90°,...,32°,123.54°,20°,-13,"Orange, Yellow, Red",Yes,This was slow low alt. Massive meteor... actua...,-,Thomas M,5/5
4,4,"O'Fallon, IL",38°,-89°,173m,2007-02-04 18:30 CST,2007-02-05 00:30 UT,5s,From up left to down right,133°,...,49°,90°,1°,-13,Orange then Yellow,No,-,80°,Lawa M,3/5


In [315]:
df_metheo_cols = list(df_metheo.columns)
print(df_metheo_cols)

['Unnamed: 0', 'Address', 'Latitude', 'Longitude', 'Elevation', 'Local Date & Time', 'UT Date & Time', 'Duration', 'Moving direction', 'Descent Angle', 'Facing azimuth', 'First azimuth', 'First elevation', 'Last azimuth', 'Last elevation', 'Stellar Magnitude', 'Color', 'Observation', 'Remarks', 'Length', 'Name', 'Experience Level']


In [316]:
df_metheo_core = df_metheo[['Address', 'Latitude', 'Longitude', 'Elevation', 'Local Date & Time','Stellar Magnitude','Color']]

In [317]:
df_metheo_core

Unnamed: 0,Address,Latitude,Longitude,Elevation,Local Date & Time,Stellar Magnitude,Color
0,"Leilani Estates, HI",19°,-154°,231m,1997-03-27 02:00 HST,-16,White
1,"Claypool, IN",41°,-85°,272m,2011-07-01 21:30 EDT,-20,White
2,"Seguin, TX",29°,-97°,141m,2004-12-17 23:00 CDT,-24,"Green, Light Green"
3,"Gulf Shores, AL",30°,-87°,5m,2004-10-12 02:14 CDT,-13,"Orange, Yellow, Red"
4,"O'Fallon, IL",38°,-89°,173m,2007-02-04 18:30 CST,-13,Orange then Yellow
...,...,...,...,...,...,...,...
11003,"Elgin, IL",42°,-88°,246m,2014-08-23 21:00 CDT,-24,"Green, White"
11004,"niles, IL",42°,-87°,-,2014-08-23 21:15 CDT,-13,green
11005,"Bolingbrook, IL",41°,-88°,-,2014-08-23 21:10 CDT,-24,BLUE GREEN
11006,"Evanston, IL",42°,-87°,184m,2014-08-23 21:15 CDT,-10,"Orange, Red"


In [318]:
df_metheo_core['Elevation'].value_counts()

Elevation
-        3514
3m         83
2m         69
5m         65
1m         57
         ... 
2195m       1
1950m       1
1672m       1
654m        1
2175m       1
Name: count, Length: 968, dtype: int64

In [319]:
state_list = []
for addr in df_metheo_core['Address']:
     state_list.append(addr.split(',')[-1])
     
df_metheo_core['state'] = state_list
     
     
df_metheo_core['Latitude'] = df_metheo_core['Latitude'].str.replace('°','')
df_metheo_core['Longitude'] = df_metheo_core['Longitude'].str.replace('°','')
     
df_metheo_core['Elevation'] = df_metheo_core['Elevation'].str.replace('m','')
     
     


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_metheo_core['state'] = state_list
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_metheo_core['Latitude'] = df_metheo_core['Latitude'].str.replace('°','')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_metheo_core['Longitude'] = df_metheo_core['Longitude'].str.replace('°','')
A value is try

In [320]:
df_metheo_core

Unnamed: 0,Address,Latitude,Longitude,Elevation,Local Date & Time,Stellar Magnitude,Color,state
0,"Leilani Estates, HI",19,-154,231,1997-03-27 02:00 HST,-16,White,HI
1,"Claypool, IN",41,-85,272,2011-07-01 21:30 EDT,-20,White,IN
2,"Seguin, TX",29,-97,141,2004-12-17 23:00 CDT,-24,"Green, Light Green",TX
3,"Gulf Shores, AL",30,-87,5,2004-10-12 02:14 CDT,-13,"Orange, Yellow, Red",AL
4,"O'Fallon, IL",38,-89,173,2007-02-04 18:30 CST,-13,Orange then Yellow,IL
...,...,...,...,...,...,...,...,...
11003,"Elgin, IL",42,-88,246,2014-08-23 21:00 CDT,-24,"Green, White",IL
11004,"niles, IL",42,-87,-,2014-08-23 21:15 CDT,-13,green,IL
11005,"Bolingbrook, IL",41,-88,-,2014-08-23 21:10 CDT,-24,BLUE GREEN,IL
11006,"Evanston, IL",42,-87,184,2014-08-23 21:15 CDT,-10,"Orange, Red",IL


In [321]:
color_list = ['white', 'red', 'blue', 'green', 'purple','orange', 'yellow']
safe_color_series = df_metheo_core['Color'].astype(str).str.lower()

In [322]:
for color in color_list:
    
    mask = safe_color_series.str.contains(color)
    
    df_metheo_core[color] = mask

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_metheo_core[color] = mask
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_metheo_core[color] = mask
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_metheo_core[color] = mask
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value ins

In [323]:
df_metheo_core

Unnamed: 0,Address,Latitude,Longitude,Elevation,Local Date & Time,Stellar Magnitude,Color,state,white,red,blue,green,purple,orange,yellow
0,"Leilani Estates, HI",19,-154,231,1997-03-27 02:00 HST,-16,White,HI,True,False,False,False,False,False,False
1,"Claypool, IN",41,-85,272,2011-07-01 21:30 EDT,-20,White,IN,True,False,False,False,False,False,False
2,"Seguin, TX",29,-97,141,2004-12-17 23:00 CDT,-24,"Green, Light Green",TX,False,False,False,True,False,False,False
3,"Gulf Shores, AL",30,-87,5,2004-10-12 02:14 CDT,-13,"Orange, Yellow, Red",AL,False,True,False,False,False,True,True
4,"O'Fallon, IL",38,-89,173,2007-02-04 18:30 CST,-13,Orange then Yellow,IL,False,False,False,False,False,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11003,"Elgin, IL",42,-88,246,2014-08-23 21:00 CDT,-24,"Green, White",IL,True,False,False,True,False,False,False
11004,"niles, IL",42,-87,-,2014-08-23 21:15 CDT,-13,green,IL,False,False,False,True,False,False,False
11005,"Bolingbrook, IL",41,-88,-,2014-08-23 21:10 CDT,-24,BLUE GREEN,IL,False,False,True,True,False,False,False
11006,"Evanston, IL",42,-87,184,2014-08-23 21:15 CDT,-10,"Orange, Red",IL,False,True,False,False,False,True,False


In [324]:
date_list = []
time_list = []

for dt in df_metheo_core['Local Date & Time']:
    date_time_splitted = dt.split(' ')
    date = date_time_splitted[0]
    time = date_time_splitted[1]
    
    date_list.append(date)
    time_list.append(time)
    
    
df_metheo_core['date'] = date_list
df_metheo_core['time'] = time_list

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_metheo_core['date'] = date_list
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_metheo_core['time'] = time_list


In [325]:
df_metheo_core['date'] = pd.to_datetime(df_metheo_core['date'])
df_metheo_core.dtypes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_metheo_core['date'] = pd.to_datetime(df_metheo_core['date'])


Address                      object
Latitude                     object
Longitude                    object
Elevation                    object
Local Date & Time            object
Stellar Magnitude            object
Color                        object
state                        object
white                          bool
red                            bool
blue                           bool
green                          bool
purple                         bool
orange                         bool
yellow                         bool
date                 datetime64[ns]
time                         object
dtype: object

In [326]:
df_metheo_core = df_metheo_core[['date', 'time','Address', 'state','Latitude', 'Longitude', 'Elevation', 'Stellar Magnitude', 'white', 'red', 'blue', 'green', 'purple', 'orange', 'yellow']]

In [327]:
df_metheo_core = df_metheo_core.sort_values(by = 'date', ascending = True).reset_index(drop=True)
df_metheo_core


Unnamed: 0,date,time,Address,state,Latitude,Longitude,Elevation,Stellar Magnitude,white,red,blue,green,purple,orange,yellow
0,1995-06-08,02:00,"Trafalgar, IN",IN,39,-86,267,-13,False,False,False,True,False,False,False
1,1995-06-20,17:00,"Munhall, PA",PA,40,-79,329,-13,False,True,False,False,False,True,False
2,1995-07-04,21:30,"Myrtle, MS",MS,34,-89,108,-19,False,True,False,False,False,True,True
3,1995-08-01,16:00,"Pottstown, PA",PA,40,-75,46,-16,False,True,False,False,False,False,False
4,1995-12-15,06:50,"Eagan, MN",MN,44,-93,257,-15,True,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11003,2014-12-31,22:40,", NY",NY,40,-73,28,-14,False,False,False,False,False,True,False
11004,2014-12-31,21:30,"Willow Hill, IL",IL,39,-87,151,-18,True,False,False,False,False,False,False
11005,2014-12-31,22:34,"fishers, IN",IN,39,-86,-,-11,True,False,False,False,False,False,False
11006,2014-12-31,05:34,"Denver, CO",CO,39,-105,-,-6,True,False,False,False,False,False,False


In [328]:
df_metheo_core['state'] = df_metheo_core['state'].str.replace(' ','')

In [329]:
df_metheo_core = df_metheo_core.loc[df_metheo_core['Elevation'] != '-']

In [330]:
df_metheo_core = df_metheo_core.reset_index(drop=True)

In [331]:
df_metheo_core

Unnamed: 0,date,time,Address,state,Latitude,Longitude,Elevation,Stellar Magnitude,white,red,blue,green,purple,orange,yellow
0,1995-06-08,02:00,"Trafalgar, IN",IN,39,-86,267,-13,False,False,False,True,False,False,False
1,1995-06-20,17:00,"Munhall, PA",PA,40,-79,329,-13,False,True,False,False,False,True,False
2,1995-07-04,21:30,"Myrtle, MS",MS,34,-89,108,-19,False,True,False,False,False,True,True
3,1995-08-01,16:00,"Pottstown, PA",PA,40,-75,46,-16,False,True,False,False,False,False,False
4,1995-12-15,06:50,"Eagan, MN",MN,44,-93,257,-15,True,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7489,2014-12-31,02:08,"Depoe Bay, OR",OR,44,-124,12,-6,True,False,False,False,False,False,False
7490,2014-12-31,22:20,", OH",OH,39,-83,309,-11,False,False,False,False,False,False,True
7491,2014-12-31,22:40,", NY",NY,40,-73,28,-14,False,False,False,False,False,True,False
7492,2014-12-31,21:30,"Willow Hill, IL",IL,39,-87,151,-18,True,False,False,False,False,False,False


#### 데이터 베이스 저장

In [332]:
import mysql.connector
import os
from dotenv import load_dotenv
from mysql.connector import Error

In [333]:
load_dotenv()
aws_db_ufo_pw = os.getenv('aws_db_ufo_pw')
aws_host_ufo = os.getenv('aws_host_ufo')


In [334]:
remote = mysql.connector.connect(
    host = aws_host_ufo,
    user = 'EDA',
    port = 3306,  # default 는 3306
    password = aws_db_ufo_pw,
    database = 'Frisbee'

)

cursor = remote.cursor()



In [293]:
cursor.close()

True

In [335]:
df_metheo_core

Unnamed: 0,date,time,Address,state,Latitude,Longitude,Elevation,Stellar Magnitude,white,red,blue,green,purple,orange,yellow
0,1995-06-08,02:00,"Trafalgar, IN",IN,39,-86,267,-13,False,False,False,True,False,False,False
1,1995-06-20,17:00,"Munhall, PA",PA,40,-79,329,-13,False,True,False,False,False,True,False
2,1995-07-04,21:30,"Myrtle, MS",MS,34,-89,108,-19,False,True,False,False,False,True,True
3,1995-08-01,16:00,"Pottstown, PA",PA,40,-75,46,-16,False,True,False,False,False,False,False
4,1995-12-15,06:50,"Eagan, MN",MN,44,-93,257,-15,True,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7489,2014-12-31,02:08,"Depoe Bay, OR",OR,44,-124,12,-6,True,False,False,False,False,False,False
7490,2014-12-31,22:20,", OH",OH,39,-83,309,-11,False,False,False,False,False,False,True
7491,2014-12-31,22:40,", NY",NY,40,-73,28,-14,False,False,False,False,False,True,False
7492,2014-12-31,21:30,"Willow Hill, IL",IL,39,-87,151,-18,True,False,False,False,False,False,False


In [336]:
table_name = 'metheo'

query_create_table = f"""
        CREATE TABLE IF NOT EXISTS {table_name} (
            id INT AUTO_INCREMENT PRIMARY KEY,
            date DATETIME,
            time TIME,
            address CHAR(25),
            state CHAR(4),
            latitude INT,
            longitude INT,
            elevation INT,
            magnitude INT,
            white BOOL,
            red BOOL,
            blue BOOL,
            green BOOL,
            purple BOOL,
            orange BOOL,
            yellow BOOL
            )
        """
        
        
cursor.execute(query_create_table)
remote.commit()
print(f"테이블 '{table_name}'이(가) 성공적으로 생성되었거나 이미 존재합니다.")


테이블 'metheo'이(가) 성공적으로 생성되었거나 이미 존재합니다.


In [337]:

for idx, row in df_metheo_core.iterrows():

    query = f"""
    INSERT INTO {table_name} (date, time, address, state, latitude, longitude, elevation, magnitude,white,red, blue, green, purple, orange, yellow)
    VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
    """

    cursor.execute(query, tuple(row))
    
    
remote.commit()
print(f"{cursor.rowcount} 개의 ROW가 {table_name} 테이블에 성공적으로 삽입 되었습니다")

1 개의 ROW가 metheo 테이블에 성공적으로 삽입 되었습니다


In [338]:
df_sw_core.to_csv('/home/roni/dev_ws/ufo_eda/data/metheo_core.csv')