In [5]:
# import packages
import pandas as pd
import snscrape.modules.twitter as sntwitter
import datetime as dt
from tqdm.notebook import tqdm
from dateutil.relativedelta import relativedelta
import fastparquet

In [6]:
# create a function to get tweets
def get_tweets(concepto, start_date, end_date, location, max_tweets):

	scrap = sntwitter.TwitterSearchScraper(concepto + ' since:' + start_date + ' until:' + end_date + 'location:' + location + ' -filter:retweets')

	tweets_list = {}

	for i, tweet in tqdm(enumerate(scrap.get_items()), total=max_tweets):
		
		tweets_list[i] = {
			'id': tweet.id,
			'date': tweet.date,
			'rawContent': tweet.rawContent,
			'replyCount': tweet.replyCount,
			'retweetCount': tweet.retweetCount,
			'likeCount': tweet.likeCount,
			'quoteCount': tweet.quoteCount,
			'lang': tweet.lang,
			'place': tweet.place,
			'hashtags': tweet.hashtags,
			'mentionedUsers': tweet.mentionedUsers,
			'user_id': tweet.user.id,
			'user_name': tweet.user.username,
			'user_renderedDescription': tweet.user.renderedDescription,
			'user_join_date': tweet.user.created,
			'user_followers': tweet.user.followersCount,
			'user_location': tweet.user.location,
			'user_verified': tweet.user.verified,
			'inReplyToTweetId': str(tweet.inReplyToTweetId)
		}

		if i > max_tweets:
			break
	
	# return tweets_list
	df = pd.DataFrame.from_dict(tweets_list, orient='index')
	return df

In [45]:
# create a function to get tweets by months
def tweet_rollout(conceptos, months, years):
	for year in years:
		for month in months:
			for concepto in conceptos:
				init_date = str(year) + '-' + str(month) + '-01'
				date_obj = dt.datetime.strptime(init_date, '%Y-%m-%d')
				new_date = date_obj + relativedelta(months=1)
				final_date = new_date.strftime('%Y-%m-%d')
				dataset = get_tweets(concepto, init_date, final_date, 'chile', 150_000)
				dataset['date'] = pd.to_datetime(dataset['date'], utc=True)
				dataset['replyCount'] = dataset['replyCount'].astype('int32')
				dataset['retweetCount'] = dataset['retweetCount'].astype('int32')
				dataset['likeCount'] = dataset['likeCount'].astype('int32')
				dataset['quoteCount'] = dataset['quoteCount'].astype('int32')
				dataset['user_verified'] = dataset['user_verified'].astype('bool')
				dataset.to_csv('datasets/' + str(year) + '/' + concepto + '_' + str(year) + '_' + str(month) + '.csv', index=False)
				print('año: ' + str(year) + ', mes: ' + str(month) + ', ' + concepto + ': ' + str(len(dataset)) + ' tweets')

In [None]:
# import tweets
years = [2022]

months = [7, 8]

# conceptos=['Convencion_Constitucional',
# 	'Proceso_Constituyente',
# 	'Nueva_Constitucion',
# 	'Convencion_Constituyente']

conceptos=['Nueva_Constitucion']

tweet_rollout(conceptos, months, years)

In [None]:
years = [2022]

months = [9]

conceptos=['Convencion_Constitucional',
	'Proceso_Constituyente',
	'Nueva_Constitucion',
	'Convencion_Constituyente']

for year in years:
	for month in months:
		for concepto in conceptos:
			dataset = get_tweets(concepto, '2022-09-01', '2022-09-04', 'chile', 150_000)
			dataset['date'] = pd.to_datetime(dataset['date'], utc=True)
			dataset['replyCount'] = dataset['replyCount'].astype('int32')
			dataset['retweetCount'] = dataset['retweetCount'].astype('int32')
			dataset['likeCount'] = dataset['likeCount'].astype('int32')
			dataset['quoteCount'] = dataset['quoteCount'].astype('int32')
			dataset['user_verified'] = dataset['user_verified'].astype('bool')
			dataset.to_csv('datasets/' + str(year) + '/' + concepto + '_' + str(year) + '_' + str(month) + '.csv', index=False)
			print('año: ' + str(year) + ', mes: ' + str(month) + ', ' + concepto + ': ' + str(len(dataset)) + ' tweets')