In [1]:
import boto3
import pandas as pd
from io import StringIO
import time

In [2]:
AWS_ACCESS_KEY=''
AWS_SECRET_KEY=''
AWS_REGION='ap-south-1'
SCHEMA_NAME='covid'
S3_STAGING_DIR=''
S3_BUCKET_NAME='athena-outputqueries'
S3_OUTPUT_DIRECTORY='output'

In [3]:
athena_client=boto3.client(
"athena",
    aws_access_key_id=AWS_ACCESS_KEY,
    aws_secret_access_key=AWS_SECRET_KEY,
    region_name=AWS_REGION
)

In [4]:
Dict={}
def download_and_load_query_results(client: boto3.client, query_response: Dict) -> pd.DataFrame:
    while True:
        try:
            
            client.get_query_results(
            QueryExecutionId=query_response['QueryExecutionId']
            )
            break
        except Exception as err:
            if "not yet finished" in str(err):
                time.sleep(0.001)
            else:
                raise err
    temp_file_location: str= "athena_query_results.csv"
    s3_client=boto3.client(
        "s3",
        aws_access_key_id=AWS_ACCESS_KEY,
        aws_secret_access_key=AWS_SECRET_KEY,
        region_name=AWS_REGION
        
    )
    s3_client.download_file(
    S3_BUCKET_NAME,
        f"{S3_OUTPUT_DIRECTORY}/{query_response['QueryExecutionId']}.csv",
        temp_file_location,
    )
    
    return pd.read_csv(temp_file_location)

In [5]:
response= athena_client.start_query_execution(
    QueryString="SELECT * FROM covid_data;",
    QueryExecutionContext={"Database": SCHEMA_NAME},
    ResultConfiguration={
        "OutputLocation": S3_STAGING_DIR,
        "EncryptionConfiguration":{"EncryptionOption":"SSE_S3"},
    },
)

In [6]:
response

{'QueryExecutionId': '1dfe6d21-9e5b-4794-a09f-da6d42f73597',
 'ResponseMetadata': {'RequestId': '4ba2368e-e8e8-461c-b7b3-25706231dc98',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Thu, 05 Jan 2023 07:42:29 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '59',
   'connection': 'keep-alive',
   'x-amzn-requestid': '4ba2368e-e8e8-461c-b7b3-25706231dc98'},
  'RetryAttempts': 0}}

In [7]:
covidData=download_and_load_query_results(athena_client, response)

In [8]:
covidData.head()

Unnamed: 0,updatedon,state,totaldosesadministered,sessions,sites,firstdoseadministered,seconddoseadministered,male(dosesadministered),female(dosesadministered),transgender(dosesadministered),...,18-44years(dosesadministered),45-60years(dosesadministered),60+years(dosesadministered),18-44years(individualsvaccinated),45-60years(individualsvaccinated),60+years(individualsvaccinated),male(individualsvaccinated),female(individualsvaccinated),transgender(individualsvaccinated),totalindividualsvaccinated
0,16/01/2021,India,48276.0,3455.0,2957.0,48276.0,0.0,,,,...,,,,,,,23757.0,24517.0,2.0,48276.0
1,17/01/2021,India,58604.0,8532.0,4954.0,58604.0,0.0,,,,...,,,,,,,27348.0,31252.0,4.0,58604.0
2,18/01/2021,India,99449.0,13611.0,6583.0,99449.0,0.0,,,,...,,,,,,,41361.0,58083.0,5.0,99449.0
3,19/01/2021,India,195525.0,17855.0,7951.0,195525.0,0.0,,,,...,,,,,,,81901.0,113613.0,11.0,195525.0
4,20/01/2021,India,251280.0,25472.0,10504.0,251280.0,0.0,,,,...,,,,,,,98111.0,153145.0,24.0,251280.0


In [9]:
covidData=covidData.sort_values(by='updatedon')

In [65]:
covidData.groupby(['state']).mean()


Unnamed: 0_level_0,totaldosesadministered,sessions,sites,firstdoseadministered,seconddoseadministered,male(dosesadministered),female(dosesadministered),transgender(dosesadministered),covaxin(dosesadministered),covishield(dosesadministered),...,18-44years(dosesadministered),45-60years(dosesadministered),60+years(dosesadministered),18-44years(individualsvaccinated),45-60years(individualsvaccinated),60+years(individualsvaccinated),male(individualsvaccinated),female(individualsvaccinated),transgender(individualsvaccinated),totalindividualsvaccinated
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Andaman and Nicobar Islands,99730.14,3550.427,20.990291,79737.16,19992.98,50746.36,43519.29,9.349515,0.0,99570.04,...,89313.33,103822.5,52919.07,12112.12,43332.05,22210.6,,,,50638.28
Andhra Pradesh,7726594.0,341934.9,1728.276699,5984761.0,1741833.0,3289657.0,3665310.0,1033.058252,1280074.0,6427122.0,...,5179065.0,8652103.0,5045893.0,1000233.0,2516865.0,1860175.0,,,,3528694.0
Arunachal Pradesh,295812.1,13727.95,112.961165,237888.3,57923.87,148620.4,119591.9,84.199029,8.834951,295148.2,...,428824.3,225995.0,88387.76,73866.51,88958.3,35309.51,,,,131759.7
Assam,3477617.0,171229.6,964.208738,2842720.0,634897.0,1727155.0,1473356.0,470.723301,576312.1,2898458.0,...,4427698.0,3152909.0,1541850.0,607897.3,1068489.0,606166.2,,,,1498557.0
Bihar,8452881.0,595688.4,1850.014563,7138363.0,1314517.0,4220362.0,3627255.0,1437.640777,761201.6,7682524.0,...,9546678.0,5885181.0,5351997.0,1436820.0,2234556.0,2622210.0,,,,4130299.0
Chandigarh,273285.6,11426.85,49.072816,217005.4,56280.27,141785.5,107608.2,32.033981,571.2524,272550.0,...,335570.3,213730.6,138747.1,52353.67,78351.3,57657.54,,,,123321.9
Chhattisgarh,4699628.0,588532.4,1977.893204,3864092.0,835535.9,2127015.0,2188191.0,469.121359,278201.8,4415312.0,...,3171658.0,4199207.0,2492610.0,413142.4,2270624.0,1467081.0,,,,2720683.0
Dadra and Nagar Haveli and Daman and Diu,186290.1,13825.05,53.965854,163878.4,22411.79,116041.9,61650.29,32.980488,5.229268,186208.9,...,384727.5,119662.4,40720.09,51577.48,41451.43,17548.24,,,,71437.74
Delhi,3944458.0,106523.3,606.42233,3030774.0,913684.2,2057803.0,1481321.0,720.92233,1061612.0,2874121.0,...,4409175.0,2969702.0,1827012.0,900501.7,1181362.0,815000.6,,,,1910858.0
Goa,447522.0,24100.79,73.932039,368890.1,78631.91,217705.7,197123.1,35.165049,7134.49,440073.1,...,557171.8,335350.8,267199.0,74404.68,120710.6,112904.0,,,,200717.4


In [11]:
covidData=covidData.reset_index()



In [12]:
covidData=covidData.drop(['index'], axis=1)

In [13]:
covidData

Unnamed: 0,updatedon,state,totaldosesadministered,sessions,sites,firstdoseadministered,seconddoseadministered,male(dosesadministered),female(dosesadministered),transgender(dosesadministered),...,18-44years(dosesadministered),45-60years(dosesadministered),60+years(dosesadministered),18-44years(individualsvaccinated),45-60years(individualsvaccinated),60+years(individualsvaccinated),male(individualsvaccinated),female(individualsvaccinated),transgender(individualsvaccinated),totalindividualsvaccinated
0,1/2/2021,West Bengal,238178.0,5304.0,759.0,238178.0,0.0,77261.0,160810.0,107.0,...,,,,,,,,,,238178.0
1,1/2/2021,Bihar,144845.0,4647.0,948.0,144845.0,0.0,39417.0,105337.0,91.0,...,,,,,,,,,,144845.0
2,1/2/2021,Dadra and Nagar Haveli and Daman and Diu,1038.0,35.0,19.0,1038.0,0.0,476.0,562.0,0.0,...,,,,,,,,,,1038.0
3,1/2/2021,Goa,3860.0,119.0,60.0,3860.0,0.0,1457.0,2403.0,0.0,...,,,,,,,,,,3860.0
4,1/2/2021,Puducherry,2316.0,100.0,14.0,2316.0,0.0,1087.0,1227.0,2.0,...,,,,,,,,,,2316.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7840,9/8/2021,Bihar,27834939.0,1151080.0,1096.0,23350171.0,4484768.0,14926420.0,12902990.0,5529.0,...,13984649.0,7520647.0,6329643.0,,,,,,,
7841,9/8/2021,Arunachal Pradesh,879094.0,31506.0,129.0,692475.0,186619.0,486874.0,391993.0,227.0,...,514163.0,263660.0,101271.0,,,,,,,
7842,9/8/2021,Himachal Pradesh,5632441.0,162941.0,701.0,4249849.0,1382592.0,2772475.0,2858812.0,1154.0,...,2216211.0,1970847.0,1445383.0,,,,,,,
7843,9/8/2021,Chhattisgarh,11769177.0,1390084.0,1027.0,9181482.0,2587695.0,5916437.0,5851349.0,1391.0,...,4389004.0,4705765.0,2674408.0,,,,,,,


In [22]:

covidData.head()

Unnamed: 0,updatedon,state,totaldosesadministered,sessions,sites,firstdoseadministered,seconddoseadministered,male(dosesadministered),female(dosesadministered),transgender(dosesadministered),...,18-44years(dosesadministered),45-60years(dosesadministered),60+years(dosesadministered),18-44years(individualsvaccinated),45-60years(individualsvaccinated),60+years(individualsvaccinated),male(individualsvaccinated),female(individualsvaccinated),transgender(individualsvaccinated),totalindividualsvaccinated
0,1/2/2021,West Bengal,238178.0,5304.0,759.0,238178.0,0.0,77261.0,160810.0,107.0,...,,,,,,,,,,238178.0
1,1/2/2021,Bihar,144845.0,4647.0,948.0,144845.0,0.0,39417.0,105337.0,91.0,...,,,,,,,,,,144845.0
2,1/2/2021,Dadra and Nagar Haveli and Daman and Diu,1038.0,35.0,19.0,1038.0,0.0,476.0,562.0,0.0,...,,,,,,,,,,1038.0
3,1/2/2021,Goa,3860.0,119.0,60.0,3860.0,0.0,1457.0,2403.0,0.0,...,,,,,,,,,,3860.0
4,1/2/2021,Puducherry,2316.0,100.0,14.0,2316.0,0.0,1087.0,1227.0,2.0,...,,,,,,,,,,2316.0


In [106]:
sortedcovidData=covidData.sort_values(by='state').reset_index().drop(['index'], axis=1)
sortedcovidData

Unnamed: 0,updatedon,state,totaldosesadministered,sessions,sites,firstdoseadministered,seconddoseadministered,male(dosesadministered),female(dosesadministered),transgender(dosesadministered),...,18-44years(dosesadministered),45-60years(dosesadministered),60+years(dosesadministered),18-44years(individualsvaccinated),45-60years(individualsvaccinated),60+years(individualsvaccinated),male(individualsvaccinated),female(individualsvaccinated),transgender(individualsvaccinated),totalindividualsvaccinated
0,16/05/2021,Andaman and Nicobar Islands,115188.0,2120.0,21.0,100058.0,15130.0,53903.0,46147.0,8.0,...,,,,10447.0,59293.0,30316.0,,,,100058.0
1,4/4/2021,Andaman and Nicobar Islands,23901.0,1200.0,12.0,18677.0,5224.0,10996.0,7681.0,0.0,...,,,,5165.0,8580.0,4930.0,,,,18677.0
2,25/07/2021,Andaman and Nicobar Islands,279839.0,2817.0,16.0,193281.0,86558.0,149816.0,129989.0,34.0,...,99170.0,120205.0,60464.0,,,,,,,
3,13/04/2021,Andaman and Nicobar Islands,48845.0,3400.0,32.0,43013.0,5832.0,23433.0,19575.0,5.0,...,,,,5287.0,23593.0,14131.0,,,,43013.0
4,24/03/2021,Andaman and Nicobar Islands,19693.0,1400.0,14.0,15247.0,4446.0,9159.0,6088.0,0.0,...,,,,4936.0,7062.0,3246.0,,,,15247.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7840,10/2/2021,West Bengal,396442.0,11924.0,973.0,396442.0,0.0,150028.0,246289.0,125.0,...,,,,,,,,,,396442.0
7841,16/02/2021,West Bengal,528075.0,15509.0,1045.0,521537.0,6538.0,228169.0,293240.0,128.0,...,,,,,,,,,,521537.0
7842,25/01/2021,West Bengal,87815.0,2713.0,632.0,87815.0,0.0,28568.0,59165.0,82.0,...,,,,,,,,,,87815.0
7843,15/03/2021,West Bengal,2353011.0,713800.0,3610.0,1958513.0,394498.0,1397096.0,955639.0,276.0,...,,,,,,,,,,2353011.0


In [14]:
bucket='aws-covd-project'

In [15]:
csv_buffer=StringIO()

In [16]:
csv_buffer

<_io.StringIO at 0x29669f60c10>

In [17]:
covidData.to_csv(csv_buffer)

In [18]:
s3_resource = boto3.resource('s3',
        aws_access_key_id=AWS_ACCESS_KEY,
         aws_secret_access_key= AWS_SECRET_KEY
                            )


In [None]:
s3_resource.Object(bucket, 'output/covid19Data.csv').put(Body=csv_buffer.getvalue())

In [19]:
covidDatasql=pd.io.sql.get_schema(covidData,'covidData')
print(''.join(covidDatasql))

CREATE TABLE "covidData" (
"updatedon" TEXT,
  "state" TEXT,
  "totaldosesadministered" REAL,
  "sessions" REAL,
  "sites" REAL,
  "firstdoseadministered" REAL,
  "seconddoseadministered" REAL,
  "male(dosesadministered)" REAL,
  "female(dosesadministered)" REAL,
  "transgender(dosesadministered)" REAL,
  "covaxin(dosesadministered)" REAL,
  "covishield(dosesadministered)" REAL,
  "sputnikv(dosesadministered)" REAL,
  "aefi" REAL,
  "18-44years(dosesadministered)" REAL,
  "45-60years(dosesadministered)" REAL,
  "60+years(dosesadministered)" REAL,
  "18-44years(individualsvaccinated)" REAL,
  "45-60years(individualsvaccinated)" REAL,
  "60+years(individualsvaccinated)" REAL,
  "male(individualsvaccinated)" REAL,
  "female(individualsvaccinated)" REAL,
  "transgender(individualsvaccinated)" REAL,
  "totalindividualsvaccinated" REAL
)


In [41]:
import redshift_connector
import boto3

In [53]:
conn =redshift_connector.connect(
    host='',
    database='dev',
    user='awsuser',
    password='',
    port=5439,
    
 )

# import redshift_connector

# # Connects to Redshift cluster using IAM credentials from default profile defined in ~/.aws/credentials
# conn = redshift_connector.connect(
#     iam=True,
#     database='dev',
#     user='AwsRed',
#     db_user='awsuser',
#     password='',
#     cluster_identifier='my-first-cluster',
#     access_key_id="",
#     secret_access_key="",
#     region="ap-south-1"
#  )

InterfaceError: ('communication error', gaierror(11001, 'getaddrinfo failed'))

In [52]:
conn.autocommit=True

NameError: name 'conn' is not defined

In [None]:
cursor=redshift_connector.Cursor = conn.cursor()

In [None]:
cursor.execute("""
CREATE TABLE "covidData" (
"updatedon" TEXT,
  "state" TEXT,
  "totaldosesadministered" REAL,
  "sessions" REAL,
  "sites" REAL,
  "firstdoseadministered" REAL,
  "seconddoseadministered" REAL,
  "male(dosesadministered)" REAL,
  "female(dosesadministered)" REAL,
  "transgender(dosesadministered)" REAL,
  "covaxin(dosesadministered)" REAL,
  "covishield(dosesadministered)" REAL,
  "sputnikv(dosesadministered)" REAL,
  "aefi" REAL,
  "18-44years(dosesadministered)" REAL,
  "45-60years(dosesadministered)" REAL,
  "60+years(dosesadministered)" REAL,
  "18-44years(individualsvaccinated)" REAL,
  "45-60years(individualsvaccinated)" REAL,
  "60+years(individualsvaccinated)" REAL,
  "male(individualsvaccinated)" REAL,
  "female(individualsvaccinated)" REAL,
  "transgender(individualsvaccinated)" REAL,
  "totalindividualsvaccinated" REAL
)
""")

In [None]:
cursor.execute("""
copy covidData from 's3://aws-covd-project/output/covid19Data.csv'
credentials ''
delimiter ','
region 'ap-south-1'
IGNOREHEADER 1
"""
)