-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_automation.py
65 lines (50 loc) · 2.05 KB
/
data_automation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 25 14:59:58 2022
@author: humab
"""
import pandas as pd
import json
data_dict = [json.loads(line) for line in open('News_Category_Dataset_v2.json', 'r')]
df = pd.DataFrame(data_dict)
filename = []
politics_counter = 1
wellness_counter = 1
entertainment_counter = 1
travel_counter = 1
sports_counter = 1
business_counter = 1
print(df)
print(df.category[0])
for index, row in df.iterrows():
if (row['category'] == 'POLITICS')and(politics_counter<501):
filename = "politics/p" + str(politics_counter) +".txt"
politics_counter = politics_counter +1
f=open(filename, "w", encoding="utf-8")
f.write(row['headline'])
elif (row['category'] == 'WELLNESS')and(wellness_counter<501):
filename = "wellness/w" + str(wellness_counter) +".txt"
wellness_counter = wellness_counter +1
f=open(filename, "w", encoding="utf-8")
f.write(row['headline'])
elif (row['category'] == 'ENTERTAINMENT')and(entertainment_counter<501):
filename = "entertainment/e" + str(entertainment_counter) +".txt"
entertainment_counter = entertainment_counter +1
f=open(filename, "w", encoding="utf-8")
f.write(row['headline'])
elif (row['category'] == 'TRAVEL')and(travel_counter<501):
filename = "travel/t" + str(travel_counter) +".txt"
travel_counter = travel_counter +1
f=open(filename, "w", encoding="utf-8")
f.write(row['headline'])
elif (row['category'] == 'SPORTS')and(sports_counter<501):
filename = "sports/s" + str(sports_counter) +".txt"
sports_counter = sports_counter +1
f=open(filename, "w", encoding="utf-8")
f.write(row['headline'])
elif (row['category'] == 'BUSINESS')and(business_counter<501):
filename = "business/b" + str(business_counter) +".txt"
business_counter = business_counter +1
f=open(filename, "w", encoding="utf-8")
f.write(row['headline'])
f.close()