In [None]:
import boto3
import pandas as pd
from io import StringIO
import time

In [None]:
AWS_ACCESS_KEY=''
AWS_SECRET_KEY=''
AWS_REGION='ap-south-1'
SCHEMA_NAME='covid'
S3_STAGING_DIR=''
S3_BUCKET_NAME='athena-outputqueries'
S3_OUTPUT_DIRECTORY='output'

In [None]:
athena_client=boto3.client(
"athena",
    aws_access_key_id=AWS_ACCESS_KEY,
    aws_secret_access_key=AWS_SECRET_KEY,
    region_name=AWS_REGION
)

In [None]:
Dict={}
def download_and_load_query_results(client: boto3.client, query_response: Dict) -> pd.DataFrame:
    while True:
        try:
            
            client.get_query_results(
            QueryExecutionId=query_response['QueryExecutionId']
            )
            break
        except Exception as err:
            if "not yet finished" in str(err):
                time.sleep(0.001)
            else:
                raise err
    temp_file_location: str= "athena_query_results.csv"
    s3_client=boto3.client(
        "s3",
        aws_access_key_id=AWS_ACCESS_KEY,
        aws_secret_access_key=AWS_SECRET_KEY,
        region_name=AWS_REGION
        
    )
    s3_client.download_file(
    S3_BUCKET_NAME,
        f"{S3_OUTPUT_DIRECTORY}/{query_response['QueryExecutionId']}.csv",
        temp_file_location,
    )
    
    return pd.read_csv(temp_file_location)

In [None]:
response= athena_client.start_query_execution(
    QueryString="SELECT * FROM covid_data;",
    QueryExecutionContext={"Database": SCHEMA_NAME},
    ResultConfiguration={
        "OutputLocation": S3_STAGING_DIR,
        "EncryptionConfiguration":{"EncryptionOption":"SSE_S3"},
    },
)

In [None]:
response

In [None]:
covidData=download_and_load_query_results(athena_client, response)

In [None]:
covidData.head()

In [None]:
covidData=covidData.sort_values(by='updatedon')

In [None]:
covidData.groupby(['state']).mean()


In [None]:
covidData=covidData.reset_index()



In [None]:
covidData=covidData.drop(['index'], axis=1)

In [None]:
covidData

In [None]:

covidData.head()

In [None]:
sortedcovidData=covidData.sort_values(by='state').reset_index().drop(['index'], axis=1)
sortedcovidData

In [None]:
bucket=''

In [None]:
csv_buffer=StringIO()

In [None]:
csv_buffer

In [None]:
covidData.to_csv(csv_buffer)

In [None]:
s3_resource = boto3.resource('s3',
        aws_access_key_id=AWS_ACCESS_KEY,
         aws_secret_access_key= AWS_SECRET_KEY
                            )


In [None]:
s3_resource.Object(bucket, 'output/covid19Data.csv').put(Body=csv_buffer.getvalue())

In [None]:
covidDatasql=pd.io.sql.get_schema(covidData,'covidData')
print(''.join(covidDatasql))

In [None]:
import redshift_connector
import boto3

In [None]:
conn =redshift_connector.connect(
    host='',
    database='dev',
    user='awsuser',
    password='',
    port=5439,
    
 )

In [None]:
conn.autocommit=True

In [None]:
cursor=redshift_connector.Cursor = conn.cursor()

In [None]:
cursor.execute("""
CREATE TABLE "covidData" (
"updatedon" TEXT,
  "state" TEXT,
  "totaldosesadministered" REAL,
  "sessions" REAL,
  "sites" REAL,
  "firstdoseadministered" REAL,
  "seconddoseadministered" REAL,
  "male(dosesadministered)" REAL,
  "female(dosesadministered)" REAL,
  "transgender(dosesadministered)" REAL,
  "covaxin(dosesadministered)" REAL,
  "covishield(dosesadministered)" REAL,
  "sputnikv(dosesadministered)" REAL,
  "aefi" REAL,
  "18-44years(dosesadministered)" REAL,
  "45-60years(dosesadministered)" REAL,
  "60+years(dosesadministered)" REAL,
  "18-44years(individualsvaccinated)" REAL,
  "45-60years(individualsvaccinated)" REAL,
  "60+years(individualsvaccinated)" REAL,
  "male(individualsvaccinated)" REAL,
  "female(individualsvaccinated)" REAL,
  "transgender(individualsvaccinated)" REAL,
  "totalindividualsvaccinated" REAL
)
""")

In [None]:
cursor.execute("""
copy covidData from 's3://aws-covd-project/output/covid19Data.csv'
credentials 'aws_iam_role=arn:aws:iam::228520749505:role/redshift-s3-access'
delimiter ','
region 'ap-south-1'
IGNOREHEADER 1
"""
)