# Web Crawling of GDELT data from 2013 - 2015
GDELT makes all data from 2015 - present accesible via Google Big Query. For the data from 1979-2013 there is a masterfile available, that compresses the event data in that timeframe. For the time from 2013-2015 however the data for the single days has to be downloaded seperately from their website. This is what this notebook is doing.

Download one file and put it in the downloaded_files folder.

In [15]:
import os
import requests
import zipfile

# Get the current working directory
current_dir = os.getcwd()

# Create a folder to save downloaded files if it doesn't exist
folder_name = 'downloaded_files'
folder_path = os.path.join(current_dir, folder_name)
os.makedirs(folder_path, exist_ok=True)

# URL of the zip folder to download
url = 'http://data.gdeltproject.org/events/20230531.export.CSV.zip'

# Determine the file name from the URL
file_name = url.split('/')[-1]

# Specify the file path where the downloaded zip folder should be saved
zip_file_path = os.path.join(folder_path, file_name)

# Download the zip folder and save it to the specified location
response = requests.get(url)
with open(zip_file_path, 'wb') as file:
    file.write(response.content)

# Extract the contents of the zip folder
extract_folder_path = os.path.join(folder_path, file_name.split('.')[0])
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_folder_path)

# Get the file inside the extracted folder
files_in_extracted_folder = os.listdir(extract_folder_path)
if len(files_in_extracted_folder) > 0:
    file_to_save = os.path.join(extract_folder_path, files_in_extracted_folder[0])

    # Specify the file path where the extracted file should be saved
    saved_file_path = os.path.join(folder_path, files_in_extracted_folder[0])
    
    # Move the file to the desired location
    os.rename(file_to_save, saved_file_path)
    print(f"File downloaded and saved to: {saved_file_path}")
else:
    print("No files found inside the extracted folder.")

# Clean up the extracted folder
os.rmdir(extract_folder_path)
print(f"Zip folder downloaded and extracted to: {extract_folder_path}")

# Delete the downloaded zip folder
os.remove(zip_file_path)
print(f"Zip folder deleted: {zip_file_path}")

File downloaded and saved to: d:\Uni\4_Semester\Machine Learning Project\Data Collection\Code\project_submission_haiperformer\data_collection\downloaded_files\20230531.export.CSV
Zip folder downloaded and extracted to: d:\Uni\4_Semester\Machine Learning Project\Data Collection\Code\project_submission_haiperformer\data_collection\downloaded_files\20230531
Zip folder deleted: d:\Uni\4_Semester\Machine Learning Project\Data Collection\Code\project_submission_haiperformer\data_collection\downloaded_files\20230531.export.CSV.zip


In [20]:
import re

data = '''20141231.export.CSV.zip (8.2MB) (MD5: 83b18986ca3a7ca241e16e9fed5dfc91)
20141230.export.CSV.zip (8.9MB) (MD5: a2371c2cf6cb710fa9456347bbc5d798)
20141229.export.CSV.zip (8.9MB) (MD5: 0810ee688ea110241d5fa210f8ec6f4c)
20141228.export.CSV.zip (5.7MB) (MD5: cb1f0de077becadc3c0a14b724b11955)
20141227.export.CSV.zip (5.4MB) (MD5: 70703bcb87112042b5207085d112945b)
20141226.export.CSV.zip (6.4MB) (MD5: 24af54095fa0666a2ca08e771636b513)
20141225.export.CSV.zip (5.3MB) (MD5: f66ac1227fddd4ebce22cfa4dd0834fa)
20141224.export.CSV.zip (8.1MB) (MD5: 7ec159c8b24d4f34591bb6fec2c4154c)
20141223.export.CSV.zip (9.5MB) (MD5: 726042ea5473274570298b40dc75696d)
20141222.export.CSV.zip (9.8MB) (MD5: afb609a5a1b70411ac906b96bc828c42)
20141221.export.CSV.zip (6.5MB) (MD5: 8ab325848b75fbf6381406e071fec95d)
20141220.export.CSV.zip (6.5MB) (MD5: a53eb0ece719a8870da9e69df183e5cd)
20141219.export.CSV.zip (10.4MB) (MD5: dddadd1f7efbfa4b080085453b57cd1b)
20141218.export.CSV.zip (11.9MB) (MD5: 0c8e0da0adcbb19c0e0937f54058e37b)
20141217.export.CSV.zip (12.4MB) (MD5: 3c1b16578ce54e9415b54d9f9bfcacf3)
20141216.export.CSV.zip (11.6MB) (MD5: c19413217e8bc80a7b5745a097f6d7dd)
20141215.export.CSV.zip (11.2MB) (MD5: a914b2b8fa3e987adc9fe7f3817e5b53)
20141214.export.CSV.zip (6.6MB) (MD5: f4ec63be7fb8fd585ff8733b560fe812)
20141213.export.CSV.zip (6.5MB) (MD5: dcd3360b1e4a90610040276239c63e88)
20141212.export.CSV.zip (10.8MB) (MD5: a5204ecddf959bb5d3bc5c95f4445abb)
20141211.export.CSV.zip (8.4MB) (MD5: e1ad216af46f16403971aaf78ffa77c9)
20141210.export.CSV.zip (12.2MB) (MD5: ae7e8407eadf6a968552fc6de268fccc)
20141209.export.CSV.zip (12.0MB) (MD5: 065f5f81293961d7a4dbf92236d87b08)
20141208.export.CSV.zip (10.4MB) (MD5: c44500a27c2e0d81d5e6ebb0989b0766)
20141207.export.CSV.zip (5.6MB) (MD5: 9d2954200559f426b22ddb7bdef91594)
20141206.export.CSV.zip (7.5MB) (MD5: 8b029ce9fe895bac0898ed755a170d92)
20141205.export.CSV.zip (10.9MB) (MD5: 08011437abe35503cc6b84e8a1a2fde8)
20141204.export.CSV.zip (12.1MB) (MD5: 99cdf541ee0382c766b3549db58636fd)
20141203.export.CSV.zip (11.9MB) (MD5: a8d081c94bd86a2455fd7d4bd2352e95)
20141202.export.CSV.zip (11.0MB) (MD5: 2be92485bb3d402c786d18451f9fac1e)
20141201.export.CSV.zip (10.8MB) (MD5: d6050573a0db180def2bd25f4e4210ed)
20141130.export.CSV.zip (6.8MB) (MD5: 8bba432f2ee1cceb9b685e61061c5bb8)
20141129.export.CSV.zip (6.3MB) (MD5: 3505252a5849c2a2bb96662d7b5bf297)
20141128.export.CSV.zip (8.8MB) (MD5: ee09e2edd50004af91c12370759cda34)
20141127.export.CSV.zip (9.0MB) (MD5: 5c15640c626b531cdd0ced903870665b)
20141126.export.CSV.zip (11.6MB) (MD5: b1982492c03496226dcbb494d2db560c)
20141125.export.CSV.zip (12.0MB) (MD5: 3c166626782722390c5203bb5dbfe8c8)
20141124.export.CSV.zip (11.5MB) (MD5: a2e835c941ee744aa2258f64c386950a)
20141123.export.CSV.zip (7.4MB) (MD5: cdda21813293f2622b71f0c001b824b6)
20141122.export.CSV.zip (7.1MB) (MD5: 8f2fc8b77b44f3fb066ba26bc7c63843)
20141121.export.CSV.zip (11.8MB) (MD5: 5ed97887e1b13a34a7991b0e011a9211)
20141120.export.CSV.zip (13.7MB) (MD5: a2f855137eb38866ccfe9afb9dc32dbb)
20141119.export.CSV.zip (12.2MB) (MD5: 335fa5f40d7780c187f4d01cea80ee34)
20141118.export.CSV.zip (12.2MB) (MD5: 40751f943aaf02711914ecb65cc59879)
20141117.export.CSV.zip (11.5MB) (MD5: b782f59da6fcce1f92b13eff34f3513a)
20141116.export.CSV.zip (7.4MB) (MD5: a72e41e043f60f7d85fb8f2318dc8367)
20141115.export.CSV.zip (7.0MB) (MD5: 74d5085e860ce8504cfc0b20f2ba1732)
20141114.export.CSV.zip (11.5MB) (MD5: 010ad2cfa43037b6c9ff1fc55ea7e098)
20141113.export.CSV.zip (12.4MB) (MD5: 647e280bfdd2afae1b8df632e5570311)
20141112.export.CSV.zip (12.5MB) (MD5: 104667654b3d70dcc499a32d8b02ce4a)
20141111.export.CSV.zip (12.0MB) (MD5: a1d7c5156b1f9e8ea2c8ffd332322574)
20141110.export.CSV.zip (11.7MB) (MD5: 9726b95b86a60fa175f4a497eab78e4d)
20141109.export.CSV.zip (7.4MB) (MD5: d713e3e78170ea74f5e4e30eec94cd4b)
20141108.export.CSV.zip (7.4MB) (MD5: de01529b9e7aefe14a25072f412b1986)
20141107.export.CSV.zip (11.5MB) (MD5: 36c78e28d8f11e770f68185d8769a189)
20141106.export.CSV.zip (12.7MB) (MD5: 8aa4e8d00d913e4c768d55e31dec487b)
20141105.export.CSV.zip (13.3MB) (MD5: 148be704b5792fdb53203d913b1a16c9)
20141104.export.CSV.zip (11.8MB) (MD5: b7075f64798b751929f8571bd22d1ceb)
20141103.export.CSV.zip (11.3MB) (MD5: e898fa2d7a3f7c34d94f09e72cfdf33d)
20141102.export.CSV.zip (7.5MB) (MD5: 4d4f5398f07ff24ac2f4f5d58f7f193f)
20141101.export.CSV.zip (7.3MB) (MD5: d9c4d9e248d20cec3e955f72b10a04a0)
20141031.export.CSV.zip (11.2MB) (MD5: a0ecb5fd126f13dd36aabecb7dcede4d)
20141030.export.CSV.zip (12.4MB) (MD5: 325f6b6353dcd435d4ed1da9e8b172aa)
20141029.export.CSV.zip (12.7MB) (MD5: aff57396b1f537e53a28645770c0b916)
20141028.export.CSV.zip (12.2MB) (MD5: 5db11ef6d21de4b51c9d1e6228d1498b)
20141027.export.CSV.zip (11.6MB) (MD5: 46e7f5bcb97f8f0be0cc5e1e6b7d3f7c)
20141026.export.CSV.zip (7.1MB) (MD5: e59ac384d58cb1cc5e7f8325754114d3)
20141025.export.CSV.zip (7.1MB) (MD5: 4609aba8d819fe7559593479e09f91d0)
20141024.export.CSV.zip (11.6MB) (MD5: 825a0961a82296b570de4ff651f04aef)
20141023.export.CSV.zip (12.4MB) (MD5: cca4b73ab1eab1057c346cd0356c0824)
20141022.export.CSV.zip (12.8MB) (MD5: 49493835763c6d675fa8d28f4dbd8dd1)
20141021.export.CSV.zip (12.8MB) (MD5: 0b0141964bde43549c5df1d475cd4d8a)
20141020.export.CSV.zip (11.7MB) (MD5: c5392ab647a650fac277509ff53b62e9)
20141019.export.CSV.zip (7.3MB) (MD5: 3f91ce714851b42ce55d90c0e3c7d005)
20141018.export.CSV.zip (7.5MB) (MD5: f49ade743877911e2ff790909d3e05d3)
20141017.export.CSV.zip (11.7MB) (MD5: c596c5de367d18df73445d544fa2b08b)
20141016.export.CSV.zip (12.6MB) (MD5: 996ff5d882a4f8adbd5364ec41426a80)
20141015.export.CSV.zip (14.9MB) (MD5: 17dc88b8718ee74ebc9888700b34d317)
20141014.export.CSV.zip (12.8MB) (MD5: 50e930388b90d9da645dfc3b34a9a77f)
20141013.export.CSV.zip (10.7MB) (MD5: d849de4161165c134606eff517fdba3b)
20141012.export.CSV.zip (7.4MB) (MD5: 14b792a1edd3e362854b83612e2a4c42)
20141011.export.CSV.zip (7.1MB) (MD5: 9cd07faf1d0e6e0c3a4d9b72efec9097)
20141010.export.CSV.zip (11.6MB) (MD5: df0cde2fb73adb8caef0827b9ae1405a)
20141009.export.CSV.zip (12.9MB) (MD5: c2bb65b5bd1dda800315ba0c3433e62e)
20141008.export.CSV.zip (12.6MB) (MD5: 0ad5839a976a1540348e19d1771e4581)
20141007.export.CSV.zip (12.0MB) (MD5: 0d5d0a58f41ae3041861d5c8a30c4062)
20141006.export.CSV.zip (10.8MB) (MD5: 510a35ae162e1d2691356802e939d69a)
20141005.export.CSV.zip (7.0MB) (MD5: a78c41d792f58dd67ba7577dde2f6894)
20141004.export.CSV.zip (7.3MB) (MD5: 8f11efc78f6ae4d6ea56618ae0f9dff8)
20141003.export.CSV.zip (10.5MB) (MD5: bf31e2322574f27d6ed7d27aeccec783)
20141002.export.CSV.zip (7.6MB) (MD5: 8fa0b8e8345940e6d711366c670facf8)
20141001.export.CSV.zip (11.5MB) (MD5: 326e8ccec5a41c39d1c09c768393d539)
20140930.export.CSV.zip (11.4MB) (MD5: 8b9b103119437ea6cd57ea050dbf42ab)
20140929.export.CSV.zip (11.0MB) (MD5: a027d64fef92cae61e692a1ee1f7b2ee)
20140928.export.CSV.zip (6.6MB) (MD5: 6745d77783489aa43e40ef77dac94900)
20140927.export.CSV.zip (6.7MB) (MD5: 08f622948fdc79366a93ebed8965c0c2)
20140926.export.CSV.zip (11.0MB) (MD5: bd08eff347bed0a77e5190d74edc6544)
20140925.export.CSV.zip (12.1MB) (MD5: 7324b0741731e00a9fa0ee20a9cfd4d5)
20140924.export.CSV.zip (12.4MB) (MD5: 014f84b671da55287a97a7ff76150ffc)
20140923.export.CSV.zip (12.3MB) (MD5: d1ba72866d4f8d1b22e2978748b5e5b7)
20140922.export.CSV.zip (10.8MB) (MD5: 12616b7c06c7c6ab5306cea34ecccdf6)
20140921.export.CSV.zip (6.5MB) (MD5: 857ab8bdcca5ff058d3c90f95575b4f0)
20140920.export.CSV.zip (6.8MB) (MD5: d649bb81de2c127de624483a107941c5)
20140919.export.CSV.zip (10.8MB) (MD5: f5ad1d50878fbea34f637cb06fcea500)
20140918.export.CSV.zip (11.5MB) (MD5: 5ca684aa7ee950a5fe6b2e94f275110f)
20140917.export.CSV.zip (11.8MB) (MD5: fa4a08a99d2a8f989f424988453cc0dc)
20140916.export.CSV.zip (11.7MB) (MD5: 68b5ff5f2649d588231a3c0ecda9271c)
20140915.export.CSV.zip (11.2MB) (MD5: d653e10724b86e02328cd73e9fb36783)
20140914.export.CSV.zip (6.8MB) (MD5: 2477a35f899afca508a8c490e52faeb7)
20140913.export.CSV.zip (6.4MB) (MD5: a7f33b69bb937e011829753e05e1effd)
20140912.export.CSV.zip (11.3MB) (MD5: 0f254f2b04a434b7363693bfa49cb6fa)
20140911.export.CSV.zip (12.3MB) (MD5: de012f40dbf643ea688c7f4ef2b4dc6e)
20140910.export.CSV.zip (12.0MB) (MD5: c74734b32f3c3ccb10009183f242ba66)
20140909.export.CSV.zip (11.3MB) (MD5: 3f4fa33aef22979de68075b171a95666)
20140908.export.CSV.zip (10.5MB) (MD5: 5371231e3b4c309fc19c535c69baa144)
20140907.export.CSV.zip (6.4MB) (MD5: 56cb657808cb86836e684d84e2379eef)
20140906.export.CSV.zip (6.7MB) (MD5: d93b837f2f07da53c09378bcce2704a0)
20140905.export.CSV.zip (11.0MB) (MD5: 3f260a2651ca167b26b1299f0856872d)
20140904.export.CSV.zip (11.7MB) (MD5: f5b77cce2bf0b56e8f884cd9f6fb235b)
20140903.export.CSV.zip (12.2MB) (MD5: 5877e29056fbed2666c0ca5b834a0ddd)
20140902.export.CSV.zip (11.0MB) (MD5: 2ac59785e1b6aabbe8756c6005819d71)
20140901.export.CSV.zip (8.7MB) (MD5: a795eceb5d3538ccdbc3243dc7660826)
20140831.export.CSV.zip (6.7MB) (MD5: eff88fa050bc583137cae8f292ca8be9)
20140830.export.CSV.zip (6.9MB) (MD5: 6bcb736921c3b33ddba9da8164e9337a)
20140829.export.CSV.zip (9.6MB) (MD5: c07702d4e48f6e586c0c9f89d83abdc8)
20140828.export.CSV.zip (11.3MB) (MD5: 63146170190c59dceba4de9f7c370836)
20140827.export.CSV.zip (11.3MB) (MD5: 27475750d496eab0aa6537417ca741e2)
20140826.export.CSV.zip (10.9MB) (MD5: e93e24693d5a7013c4ce4bae1d70b3b6)
20140825.export.CSV.zip (10.2MB) (MD5: 65ca5bdd7a64dfaab4a4cbca0c72d166)
20140824.export.CSV.zip (6.8MB) (MD5: 2e7397711c1c80c61aa048c727256686)
20140823.export.CSV.zip (6.3MB) (MD5: 6b949a1bad8450687e5ba4070a34fb03)
20140822.export.CSV.zip (10.1MB) (MD5: a1dc86cede053a884a82e4f7a249a463)
20140821.export.CSV.zip (11.0MB) (MD5: 2504595707a4fd1f037503729b8f54a8)
20140820.export.CSV.zip (11.2MB) (MD5: cba242b054b24296a3cea1925fb18a66)
20140819.export.CSV.zip (10.6MB) (MD5: 58bebdee320cca811e99382566270859)
20140818.export.CSV.zip (10.1MB) (MD5: 1df80bb7a4baf4b42df8ae22efc477a5)
20140817.export.CSV.zip (6.2MB) (MD5: b1b5e2f5f4b722083c236041a7817a79)
20140816.export.CSV.zip (6.2MB) (MD5: 86dedbfb4fc640f00d29838f03ea65d3)
20140815.export.CSV.zip (9.8MB) (MD5: b2b3cc6d617b1b5bba4f24146736d074)
20140814.export.CSV.zip (10.8MB) (MD5: 231a7fb7486bc725f50ffd56bf376832)
20140813.export.CSV.zip (11.1MB) (MD5: 86cf6aa67bdceb8ab89ba37825840016)
20140812.export.CSV.zip (10.5MB) (MD5: 364d8f856d2cfa2d6bd40f6a9614531e)
20140811.export.CSV.zip (9.3MB) (MD5: 05f9e8ca62bda54f0b90498ea683a003)
20140810.export.CSV.zip (6.6MB) (MD5: b1ea901cea1f001c2d264d812b9e45fe)
20140809.export.CSV.zip (6.8MB) (MD5: 50cf7ae1bc43d03f586397cf18a0ab74)
20140808.export.CSV.zip (10.4MB) (MD5: 827583dd6f3493f4e4df89781ef28181)
20140807.export.CSV.zip (11.2MB) (MD5: 5c13532edb8aea48ea0ab92d08bd8663)
20140806.export.CSV.zip (11.1MB) (MD5: cbc411698078fbe26b0177002ee1bfda)
20140805.export.CSV.zip (10.9MB) (MD5: af471babdda0a9486fcb88a0698b9687)
20140804.export.CSV.zip (10.2MB) (MD5: 591161fd0437eb6c352d8939bf2f9f7c)
20140803.export.CSV.zip (6.4MB) (MD5: 67cd0b42fb7b490eceb6d6c84fc30890)
20140802.export.CSV.zip (6.4MB) (MD5: 0c6d5d924b4bff5896b1171e1847f5a8)
20140801.export.CSV.zip (9.9MB) (MD5: 780520f5da3b31baf60fbb49d13f7887)
20140731.export.CSV.zip (10.7MB) (MD5: 958a3826f7c47368e2341f4b20c90843)
20140730.export.CSV.zip (10.2MB) (MD5: 20698be8b2266232880ac5c630c62bac)
20140729.export.CSV.zip (10.3MB) (MD5: b8e65092d975a26fbdfa1c872578da41)
20140728.export.CSV.zip (9.9MB) (MD5: cf0dd430ff16567176772238620beeb6)
20140727.export.CSV.zip (6.4MB) (MD5: cd8d7df03d0c12d88b55d3838c58e2d6)
20140726.export.CSV.zip (6.9MB) (MD5: 6fc738910454b554828d44d30db540d9)
20140725.export.CSV.zip (11.5MB) (MD5: 714fc3cf03b4b1ffe8d5bd258ff01e2c)
20140724.export.CSV.zip (11.7MB) (MD5: 96e37a7d0266ff9d40aa4e2eef06e763)
20140723.export.CSV.zip (11.7MB) (MD5: 4e13e1f03fb6dea4fa9a8e05dc2cbeea)
20140722.export.CSV.zip (11.3MB) (MD5: 5a9f4ce5a9b11b5ea664882651820998)
20140721.export.CSV.zip (11.0MB) (MD5: 5d975d3a7ce85b7f4740888bc092f898)
20140720.export.CSV.zip (7.2MB) (MD5: a9621ed78a6a540f2ae9fd0930a43f8d)
20140719.export.CSV.zip (7.4MB) (MD5: 0afec5634c6a9dfec6c528839387283c)
20140718.export.CSV.zip (11.8MB) (MD5: 9e6fee0547c5d153ba1e7dbff895412a)
20140717.export.CSV.zip (11.7MB) (MD5: 62ed5a692cc731bda6b1fabec7748c6d)
20140716.export.CSV.zip (11.0MB) (MD5: aec5c0968774ec05fe155b5896eb2173)
20140715.export.CSV.zip (11.0MB) (MD5: c592842808ea84d4f056ac7a9c9dcfba)
20140714.export.CSV.zip (10.1MB) (MD5: 7f87d18a76d5e38a307fa68aeb20f7b3)
20140713.export.CSV.zip (6.4MB) (MD5: 995e072e7580e53392f03f081b646208)
20140712.export.CSV.zip (6.3MB) (MD5: 815941a2fdcf4b26af91ff074d3629a1)
20140711.export.CSV.zip (15.1MB) (MD5: eb3d05f91acccbc31ad03d3e52b69ff2)
20140710.export.CSV.zip (10.7MB) (MD5: 9a4f7bbe7cc695177ab4134fd732d580)
20140709.export.CSV.zip (9.9MB) (MD5: 7ee12ba5da38394fb64f6cc458a4264e)
20140708.export.CSV.zip (9.7MB) (MD5: a3ad8eaa9523ea0c061d44cf8c3f2d5c)
20140707.export.CSV.zip (10.6MB) (MD5: c25327524b356b1b1690f5079ab1b3cb)
20140706.export.CSV.zip (5.7MB) (MD5: 3f0b924d9826542303e7f2510e3b50ef)
20140705.export.CSV.zip (5.1MB) (MD5: eaae1db44e0cebcb141c23f2a1e841d7)
20140704.export.CSV.zip (7.5MB) (MD5: 93b4528fd5ca2ffed93f48aa389fb779)
20140703.export.CSV.zip (9.6MB) (MD5: e6295bb198659e73692f768fd7691deb)
20140702.export.CSV.zip (9.8MB) (MD5: 83e3c7f8a3cb199b320298da998f7ca0)
20140701.export.CSV.zip (9.4MB) (MD5: 22a6a8ce958a350c89d5ece5773a88e5)
20140630.export.CSV.zip (8.9MB) (MD5: 8322332cec91e3a40383e5e3964ece3d)
20140629.export.CSV.zip (5.6MB) (MD5: f2c8c06e003be1b8125db641b74172cb)
20140628.export.CSV.zip (5.6MB) (MD5: 79f103e30617d71c0c4a5a78f99c8ed1)
20140627.export.CSV.zip (9.5MB) (MD5: 7e2f2f2b3ef7fbb93c386282f0fdde4f)
20140626.export.CSV.zip (10.1MB) (MD5: eaa6269cd62834d5b10f4200dd7c4ced)
20140625.export.CSV.zip (10.5MB) (MD5: c0e59ae671d2444973e025ef761a2f8a)
20140624.export.CSV.zip (10.2MB) (MD5: 9c012086b3acbf9e8cacad6a5f4f6bf6)
20140623.export.CSV.zip (9.8MB) (MD5: 40e7f86e8e09606490081658d41e29f9)
20140622.export.CSV.zip (6.5MB) (MD5: 95a7fffe28aff5240ae988768f5e39e6)
20140621.export.CSV.zip (6.5MB) (MD5: 35914ec255daa0bbc89f75d34de6045f)
20140620.export.CSV.zip (9.8MB) (MD5: 090a5780ae64a1f98fb5177a37438c8b)
20140619.export.CSV.zip (10.2MB) (MD5: 6eb1943d6c71428d8fc4ae8af845df09)
20140618.export.CSV.zip (10.3MB) (MD5: e51a9cacb11b0d2f8c7178a082a009a2)
20140617.export.CSV.zip (10.1MB) (MD5: 8752c63be82b52429e9e6f499d62f56d)
20140616.export.CSV.zip (9.5MB) (MD5: f121026350192fbde4be9b5911fffe9d)
20140615.export.CSV.zip (5.8MB) (MD5: 62b2c948c55c28d7cdc882c7ee3f22a7)
20140614.export.CSV.zip (5.8MB) (MD5: 7f840397550011c0e51539504c95aea3)
20140613.export.CSV.zip (9.1MB) (MD5: f6b030702ea3829c81668986c30eeffa)
20140612.export.CSV.zip (9.6MB) (MD5: 23f7f2b5ba70b75ce6e5ca78d5102b5b)
20140611.export.CSV.zip (10.0MB) (MD5: 5766810385db9f3282f41bff73b22c82)
20140610.export.CSV.zip (10.0MB) (MD5: 00224eb4edfcac1c849ed0ea667800ba)
20140609.export.CSV.zip (8.9MB) (MD5: 952e231c22b1523a7ef842b8b564a5ac)
20140608.export.CSV.zip (5.7MB) (MD5: 55ccfb104da0ea22de5a7f32abec60ac)
20140607.export.CSV.zip (5.8MB) (MD5: f64313c33b09643d61ee1be1fc5406bf)
20140606.export.CSV.zip (8.1MB) (MD5: 64bfe5b3cccb4278f1a72f4537fbd0ae)
20140605.export.CSV.zip (10.5MB) (MD5: ea20999b7048fc88de88e39cbdaea799)
20140604.export.CSV.zip (10.6MB) (MD5: 02322729a695f6b835990b79baa25fc4)
20140603.export.CSV.zip (10.3MB) (MD5: 03d2e610b5b3be5bcd138c9c44fb1aaf)
20140602.export.CSV.zip (9.5MB) (MD5: d3b9a096e51f64806c4a1581d6539a8c)
20140601.export.CSV.zip (6.0MB) (MD5: a08d60ea29eb14dfe56415eb83224026)
20140531.export.CSV.zip (6.0MB) (MD5: e258f1e863f695faa9e276271548897a)
20140530.export.CSV.zip (9.3MB) (MD5: 5f19139d51c75e1b37c5d312f386d265)
20140529.export.CSV.zip (10.1MB) (MD5: b7353518c1df3da4891972efa0af770a)
20140528.export.CSV.zip (10.1MB) (MD5: 743b0327338b33909490b6994c109cd1)
20140527.export.CSV.zip (9.7MB) (MD5: ee9fc5fc3e01863123818e517ac034e1)
20140526.export.CSV.zip (7.7MB) (MD5: 4587112c05ed5a32ea7233fac8f18a27)
20140525.export.CSV.zip (6.5MB) (MD5: 8736bf275801ca08daab15a30614fc22)
20140524.export.CSV.zip (6.3MB) (MD5: b6c2057e91fddcce41fdf45edd451ecd)
20140523.export.CSV.zip (9.7MB) (MD5: e6ea6102b5858c9120d72b36f5dbc976)
20140522.export.CSV.zip (10.4MB) (MD5: 40993ae8d0187c2d285bb8eea013ed64)
20140521.export.CSV.zip (10.5MB) (MD5: c74d4651101ef9aa3782d3b6896653e7)
20140520.export.CSV.zip (10.4MB) (MD5: 50e61002d894aac3994116bd89b3f21c)
20140519.export.CSV.zip (9.6MB) (MD5: 1fe7b0e5b4393605809f2b1491b76234)
20140518.export.CSV.zip (5.7MB) (MD5: 246342f2c07ff81748c5ada337857eac)
20140517.export.CSV.zip (5.9MB) (MD5: 57708ae30d59e8eac53618c74218a4b5)
20140516.export.CSV.zip (9.5MB) (MD5: 669b66bb9b0cb5274f97b0b60659fcb7)
20140515.export.CSV.zip (10.4MB) (MD5: 7545cb6423342fbd953a1cfcd9ffe17f)
20140514.export.CSV.zip (10.1MB) (MD5: 2a1ac240939be4a9afdb4f3210ba8f66)
20140513.export.CSV.zip (9.9MB) (MD5: a0b176d0cf758ba21fc10c5f67a0a22f)
20140512.export.CSV.zip (9.6MB) (MD5: 4dec2e377ea6b8fc70e3d5d8a1954311)
20140511.export.CSV.zip (5.6MB) (MD5: b3fdf8000dc7623e9878565b508370ae)
20140510.export.CSV.zip (5.9MB) (MD5: 42fce440b03ca2ef8f52f78267947194)
20140509.export.CSV.zip (9.3MB) (MD5: 1f1077743410df4079390595bd249591)
20140508.export.CSV.zip (10.0MB) (MD5: 23d018188e602511aef30c427f0169e0)
20140507.export.CSV.zip (10.4MB) (MD5: d293f48d192efacaaaf7f6f9d80eb678)
20140506.export.CSV.zip (10.5MB) (MD5: 3fb772cea09c12c9daf654c1a0705ace)
20140505.export.CSV.zip (9.3MB) (MD5: d6b5263da651bd2d1823bfab9eed0a97)
20140504.export.CSV.zip (5.8MB) (MD5: 4e88e324da7bea67dbc5ecfde77898eb)
20140503.export.CSV.zip (6.0MB) (MD5: 9d36d8fb7e0f34e7d8b92fe1adde3d84)
20140502.export.CSV.zip (9.4MB) (MD5: d51133dbdaf1eb1bc1d613a8c52ad72f)
20140501.export.CSV.zip (9.6MB) (MD5: 1e362ec869f55c0be74b5dd834ff5015)
20140430.export.CSV.zip (10.2MB) (MD5: c60a70493162d9370ca6d9f5db6c4d18)
20140429.export.CSV.zip (10.4MB) (MD5: f97427b1d14986d4f68a8c1c818438bb)
20140428.export.CSV.zip (9.7MB) (MD5: aae50edd4082524177d3404547c47081)
20140427.export.CSV.zip (6.0MB) (MD5: 1a0d2a9d7c6fb89a22157e3da0dd7e9e)
20140426.export.CSV.zip (6.0MB) (MD5: 9c13dfd36a22862cdcf8d309305fc257)
20140425.export.CSV.zip (9.8MB) (MD5: b1c322a20da6bf8935b523799c7f0623)
20140424.export.CSV.zip (10.4MB) (MD5: e3ae0e6c69bfc1ba3630fe37aa57ddd0)
20140423.export.CSV.zip (11.1MB) (MD5: 25b066ba46409ee3873d2c3cf4c9bde1)
20140422.export.CSV.zip (10.3MB) (MD5: 80af927e336726d00a052d92576cc143)
20140421.export.CSV.zip (8.6MB) (MD5: afb979a6b03509f409b6c08fda28e1b4)
20140420.export.CSV.zip (5.6MB) (MD5: df6392e92b77cd39017b42f5831d50ca)
20140419.export.CSV.zip (5.6MB) (MD5: 41cf5067ce372a042b3c76e2b457dd75)
20140418.export.CSV.zip (8.5MB) (MD5: 2acac2216e91aebcea0ec9d08afbf461)
20140417.export.CSV.zip (10.9MB) (MD5: 3784ff892c0cb80a6df0c8b9d70f7378)
20140416.export.CSV.zip (10.4MB) (MD5: e10e0a98d5bfbfa523a2a87805b0a273)
20140415.export.CSV.zip (10.3MB) (MD5: 49394e689b830f7aa53d45f84793e9c8)
20140414.export.CSV.zip (9.8MB) (MD5: 7b2f702a9ba1bdac8a6eb94758c54ffb)
20140413.export.CSV.zip (6.0MB) (MD5: a516bdff7bb6ecf7f7a39f1ef467dafc)
20140412.export.CSV.zip (6.1MB) (MD5: 1e59c68f0cbfe97da5b37b3716009ff8)
20140411.export.CSV.zip (9.9MB) (MD5: 59e57bf9a34dd642e927501e8e88a815)
20140410.export.CSV.zip (11.0MB) (MD5: 0c10b92ee95d71621591aa167513cff3)
20140409.export.CSV.zip (11.2MB) (MD5: d32d0aaa73893cde50b1f6f3359063ec)
20140408.export.CSV.zip (7.8MB) (MD5: 032329fc597699ac4527a727e3d31e14)
20140407.export.CSV.zip (9.9MB) (MD5: 85a91cc791432fa234355d26bf5a9d9c)
20140406.export.CSV.zip (6.4MB) (MD5: 756570756dc6ea0ecb56de68d7c8faf9)
20140405.export.CSV.zip (7.3MB) (MD5: 56d3fb07934736a654cb15a9e105919b)
20140404.export.CSV.zip (6.0MB) (MD5: da90ab8346f79bb524b896829799d777)
20140403.export.CSV.zip (9.7MB) (MD5: b121e8bdb845ad67a673541d6117f1e9)
20140402.export.CSV.zip (9.4MB) (MD5: 3f333d883cecfb173d780ca5969fe882)
20140401.export.CSV.zip (9.8MB) (MD5: 4fecfe71e09695e9fee7a9eee1db9fed)
20140331.export.CSV.zip (9.6MB) (MD5: 3195ace6ae1093e47a1e2065ea45bb25)
20140330.export.CSV.zip (5.2MB) (MD5: 474348f6e77ea00dffeda21c24fec6dc)
20140329.export.CSV.zip (5.9MB) (MD5: 80273aece72548bf69fc5f1787412e80)
20140328.export.CSV.zip (9.7MB) (MD5: 6c553a756f2c45931a1247d2dc7522c0)
20140327.export.CSV.zip (10.5MB) (MD5: f0763181a74879f030aa2a71e3e92a48)
20140326.export.CSV.zip (10.7MB) (MD5: 8c39f389ad8aa6dec5ab08a309b76e53)
20140325.export.CSV.zip (10.8MB) (MD5: c17bba616e014e220ce72a72585bd16d)
20140324.export.CSV.zip (10.0MB) (MD5: a1fee20ad6d2805ae0d86ad133b07750)
20140323.export.CSV.zip (6.2MB) (MD5: fea6c09083c71a64848acfb9d82b6fe5)
20140322.export.CSV.zip (6.3MB) (MD5: 9a08960a9d2fe00afc6b1ff45c573c5a)
20140321.export.CSV.zip (9.9MB) (MD5: d492ca38db3c8f40b657b0eb2415f950)
20140320.export.CSV.zip (10.6MB) (MD5: 8602497fdc0f54861c056d33fb64f3b8)
20140318.export.CSV.zip (10.7MB) (MD5: cf0c2a30b09cdbc28204eb0eca53db1e)
20140317.export.CSV.zip (9.8MB) (MD5: 61e70e4ff79e590abddd6f26f8dfa552)
20140316.export.CSV.zip (6.4MB) (MD5: 7d6b48c5fb5393f5071ae90b6ddadf1c)
20140315.export.CSV.zip (6.4MB) (MD5: 934c49a95ffc3645bb14153ac448e135)
20140314.export.CSV.zip (6.2MB) (MD5: f605f7243700d90f64b86fa1f330816f)
20140313.export.CSV.zip (10.6MB) (MD5: 589585a360862da8e94cff5d18416f70)
20140312.export.CSV.zip (10.0MB) (MD5: 14511c7db7a8f78aa42b931934f04741)
20140311.export.CSV.zip (10.6MB) (MD5: 9cd356cf0b6414daa1c2edfafc7b42bc)
20140310.export.CSV.zip (10.0MB) (MD5: 817a6576a41e3f2517de1e25f6149a09)
20140309.export.CSV.zip (6.0MB) (MD5: 85ce2582c4cfdb9543a47f9c461ca75b)
20140308.export.CSV.zip (6.3MB) (MD5: 913859f02c764ea146cf6adb05fc31c9)
20140307.export.CSV.zip (9.7MB) (MD5: 5e0ab217c02a36a614f0a35e56ca839d)
20140306.export.CSV.zip (10.9MB) (MD5: 42297c7506492e8928f2a1cf38974a6e)
20140305.export.CSV.zip (10.9MB) (MD5: 1a511ef994cb8f741e63e05a1f6c17d1)
20140304.export.CSV.zip (10.5MB) (MD5: 67e8c19ed899eb0f6ac7df32243c5f22)
20140303.export.CSV.zip (10.2MB) (MD5: f9e5366e0ee2c3d0c0a2739708986015)
20140302.export.CSV.zip (2.0MB) (MD5: 44a0c45ef35ef46fdb31abe34935df7d)
20140301.export.CSV.zip (6.8MB) (MD5: 8c0e058c56aa99992b51c03df80199bf)
20140228.export.CSV.zip (10.4MB) (MD5: ad9eb62391c6f1b92f4e2cafaac275bb)
20140227.export.CSV.zip (3.0MB) (MD5: af7eaf197e2e00bdcd69887789945c81)
20140226.export.CSV.zip (11.3MB) (MD5: 9ff47a662bbc7ded8f94ee763ad8c2e1)
20140225.export.CSV.zip (11.1MB) (MD5: eea54c2463ef93610553273b63e5c90e)
20140224.export.CSV.zip (10.5MB) (MD5: 8a7191e3a671c45338e164a9aa8445f5)
20140223.export.CSV.zip (6.4MB) (MD5: ee2515104742d9162458e8e18be24f54)
20140222.export.CSV.zip (6.4MB) (MD5: e4aeef80f08ddd38ff9854e282ee14e9)
20140221.export.CSV.zip (10.3MB) (MD5: 392f2f7f615a3547a7debdc7fca3acaf)
20140220.export.CSV.zip (11.2MB) (MD5: b45859f9ee600c1c9e06ea8130d65138)
20140219.export.CSV.zip (11.7MB) (MD5: cc0f323fcfa9d6b141bb3641aaec40f8)
20140218.export.CSV.zip (10.6MB) (MD5: cd454e7d40d92bf9522c82acd835dc90)
20140217.export.CSV.zip (9.5MB) (MD5: 2893e5423c209fcefca87daf6c7fdc12)
20140216.export.CSV.zip (5.3MB) (MD5: 544edec2c95a7d8658da26d291ca3d6a)
20140215.export.CSV.zip (5.4MB) (MD5: 654d155bafe30c8b0b367e5419b4c564)
20140214.export.CSV.zip (9.2MB) (MD5: 218b9a16eb0f06a9d40edd16b5aede7d)
20140213.export.CSV.zip (9.9MB) (MD5: 0120cb97efe7bbfce4cdae711ed394a4)
20140212.export.CSV.zip (10.2MB) (MD5: f6a8ab170e2c72098ad5b5ba036dc4a0)
20140211.export.CSV.zip (10.2MB) (MD5: 087a2d51d5a2976b2d2d19e2b13cd3e6)
20140210.export.CSV.zip (9.2MB) (MD5: 4648b35c9d7271217d8b37f32d3c97b2)
20140209.export.CSV.zip (5.4MB) (MD5: 9a2da0756be26f473ea7e5271f3b781c)
20140208.export.CSV.zip (5.2MB) (MD5: 7bdd1179b80ba891ac093846dab584cf)
20140207.export.CSV.zip (8.9MB) (MD5: 8c51f05c6d65dfdfcaad301375370495)
20140206.export.CSV.zip (9.7MB) (MD5: 4be8892b01f80c203c2e5e7dd6d5511e)
20140205.export.CSV.zip (9.6MB) (MD5: 67bad98ef2de67679ca292179373f490)
20140204.export.CSV.zip (9.7MB) (MD5: 34c54a475de5d8b5f7f473d345e034f6)
20140203.export.CSV.zip (8.9MB) (MD5: a6e774acb21968439ef897cc7b910e33)
20140202.export.CSV.zip (5.6MB) (MD5: 699af51d41a1e8137f84759061243150)
20140201.export.CSV.zip (5.6MB) (MD5: 380834b6a47ec96fc081519253d50041)
20140131.export.CSV.zip (8.2MB) (MD5: 811001d9e7bd087c8f31599e6b6605be)
20140130.export.CSV.zip (6.4MB) (MD5: e9181baae14a9a682e60b44b65efcfce)
20140129.export.CSV.zip (10.3MB) (MD5: ec4a1dfb0b891a2718a55b69ec2a501d)
20140128.export.CSV.zip (11.1MB) (MD5: e6f78daf8ab95a3ab3082ca7194ba2fd)
20140127.export.CSV.zip (10.4MB) (MD5: 9d1f19f34780978191aa3ce70544da0f)
20140126.export.CSV.zip (5.6MB) (MD5: 5cc0a28c7bfba286d3f9af37ebbb22e3)
20140122.export.CSV.zip (2.5MB) (MD5: 6671f4f9e3abcfceb23128265d85b7c3)
20140121.export.CSV.zip (2.3MB) (MD5: cef14b4f1a9f7a6c3db079fa6e2be604)
20140120.export.CSV.zip (2.1MB) (MD5: 7a6ae58b147f1f0b04d5615573e3c4b8)
20140119.export.CSV.zip (1.3MB) (MD5: 425af2249fe2daf9345d7a864265c52a)
20140118.export.CSV.zip (1.4MB) (MD5: d074f0cdc5b8aa52edbe8a649af74f83)
20140117.export.CSV.zip (2.1MB) (MD5: 22dd976300cf0cc505a6a00e1c9d517a)
20140116.export.CSV.zip (10.6MB) (MD5: b515b8b2c97af834c91f5f4916084b8f)
20140115.export.CSV.zip (10.8MB) (MD5: 6508a8d954381dbcb901210c9ba8f57d)
20140114.export.CSV.zip (10.1MB) (MD5: 0cda0225f8a03966bfe5c45f1fe37de6)
20140113.export.CSV.zip (9.0MB) (MD5: 0fe0e9e4998d2b25d885ddd9ed4ebde0)
20140112.export.CSV.zip (6.0MB) (MD5: bc14583cbce4c31ec95e7f2821c69455)
20140111.export.CSV.zip (6.0MB) (MD5: 3e9d52d8904ec17e3ef13d92347264e3)
20140110.export.CSV.zip (9.2MB) (MD5: dbe02369d76cf20682f9f1518fe0c286)
20140109.export.CSV.zip (10.0MB) (MD5: 6d2f75b4f15bf402585c7a041822ed2d)
20140108.export.CSV.zip (9.8MB) (MD5: ec2d8a0df9092458032701c9af5b40d0)
20140107.export.CSV.zip (9.3MB) (MD5: 7aeeaea0720b183b6df1db92eca25773)
20140106.export.CSV.zip (8.7MB) (MD5: 16fa92feff9c8d5d0d39ad4295caa5bd)
20140105.export.CSV.zip (5.7MB) (MD5: 57f2c518334a7fdc2a1355a29df5ff62)
20140104.export.CSV.zip (4.8MB) (MD5: 7b9a271d1042fc6b62c8325a8378ce6e)
20140103.export.CSV.zip (7.5MB) (MD5: 1cbd9404727ccb0fe2fc8fed3d864bb0)
20140102.export.CSV.zip (8.0MB) (MD5: e479a766110f78699ce5b1dfd5680738)
20140101.export.CSV.zip (4.9MB) (MD5: f06897f32aaf209febe2e70c3871730d)'''

pattern = r'([^\s]+\.zip)'
endings = re.findall(pattern, data)

base_url = "http://data.gdeltproject.org/events/"
urls_2014 = [base_url + ending for ending in endings]

print(urls_2014)

['http://data.gdeltproject.org/events/20141231.export.CSV.zip', 'http://data.gdeltproject.org/events/20141230.export.CSV.zip', 'http://data.gdeltproject.org/events/20141229.export.CSV.zip', 'http://data.gdeltproject.org/events/20141228.export.CSV.zip', 'http://data.gdeltproject.org/events/20141227.export.CSV.zip', 'http://data.gdeltproject.org/events/20141226.export.CSV.zip', 'http://data.gdeltproject.org/events/20141225.export.CSV.zip', 'http://data.gdeltproject.org/events/20141224.export.CSV.zip', 'http://data.gdeltproject.org/events/20141223.export.CSV.zip', 'http://data.gdeltproject.org/events/20141222.export.CSV.zip', 'http://data.gdeltproject.org/events/20141221.export.CSV.zip', 'http://data.gdeltproject.org/events/20141220.export.CSV.zip', 'http://data.gdeltproject.org/events/20141219.export.CSV.zip', 'http://data.gdeltproject.org/events/20141218.export.CSV.zip', 'http://data.gdeltproject.org/events/20141217.export.CSV.zip', 'http://data.gdeltproject.org/events/20141216.export.C

In [None]:
for url in urls_2014:
    # Get the current working directory
    current_dir = os.getcwd()

    # Create a folder to save downloaded files if it doesn't exist
    folder_name = 'downloaded_files'
    folder_path = os.path.join(current_dir, folder_name)
    os.makedirs(folder_path, exist_ok=True)

    # Determine the file name from the URL
    file_name = url.split('/')[-1]

    # Specify the file path where the downloaded zip folder should be saved
    zip_file_path = os.path.join(folder_path, file_name)

    # Download the zip folder and save it to the specified location
    response = requests.get(url)
    with open(zip_file_path, 'wb') as file:
        file.write(response.content)

    # Extract the contents of the zip folder
    extract_folder_path = os.path.join(folder_path, file_name.split('.')[0])
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_folder_path)

    # Get the file inside the extracted folder
    files_in_extracted_folder = os.listdir(extract_folder_path)
    if len(files_in_extracted_folder) > 0:
        file_to_save = os.path.join(extract_folder_path, files_in_extracted_folder[0])

        # Specify the file path where the extracted file should be saved
        saved_file_path = os.path.join(folder_path, files_in_extracted_folder[0])
        
        # Move the file to the desired location
        os.rename(file_to_save, saved_file_path)
        print(f"File downloaded and saved to: {saved_file_path}")
    else:
        print("No files found inside the extracted folder.")

    # Clean up the extracted folder
    os.rmdir(extract_folder_path)
    print(f"Zip folder downloaded and extracted to: {extract_folder_path}")

    # Delete the downloaded zip folder
    os.remove(zip_file_path)
    print(f"Zip folder deleted: {zip_file_path}")