-
Notifications
You must be signed in to change notification settings - Fork 3
/
temp_code.py
182 lines (132 loc) · 6 KB
/
temp_code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import bsds.functions_combined_BEST as ji
def search_for_tweets_with_word(twitter_df,word, display_n=10,from_column='content', ascending=False,
as_md=True):
"""Searches the df's `from_column` for the specified `word`
- `as_md`
- If True(default):
- Call `df_to_md` to get Markdown string from resulting df
- if False:
- Return as dataframe
- display the most-recent or oldest tweets using `ascending` parameter.
- if return_index: return the datetimeindex of the tweets containing the word."""
import pandas as pd
from IPython.display import display
import numpy as np
word_in = word
## Make list of cols starting with from_column and adding display_cols
select_cols = [from_column]
# [select_cols.append(x) for x in display_cols]
# Create new df copy with select_cols
df = twitter_df[select_cols].copy()
## Check from_column for word.lower() in text.lower()
check_word = df[from_column].apply(lambda x: True if word.lower() in x.lower() else False)
# Tally number of tweets containing word
found_words = int(np.sum([1 for x in check_word if x ==True]))
## Get rows with the searched word
res_df = df.loc[check_word]
## Sort res_df_ by datetime index, before resetting index
res_df.sort_index(inplace=True, ascending=ascending)
res_df.reset_index(inplace=True)
## If as_md, get md_text from df_to_md
if as_md:
df_to_show = res_df.iloc[:display_n]
header = f'### Showing {display_n} of {found_words} instances of "{word_in}"\n'
md_tweets = df_to_md(df_to_show)
text_out = header + md_tweets
return text_out
else:
return res_df
def search_for_tweets_by_date(twitter_df,date, display_n=10,from_column='content', ascending=False,
as_md=True):
"""Searches the df's `from_column` for the specified `word`
- `as_md`
- If True(default):
- Call `df_to_md` to get Markdown string from resulting df
- if False:
- Return as dataframe
- display the most-recent or oldest tweets using `ascending` parameter.
- if return_index: return the datetimeindex of the tweets containing the word."""
import pandas as pd
# import functions_combined_BEST as ji
from IPython.display import display
import numpy as np
## Make list of cols starting with from_column and adding display_cols
select_cols = [from_column]
# [select_cols.append(x) for x in display_cols]
# Create new df copy with select_cols
df = twitter_df[select_cols].copy()
## Get rows with the searched word
res_df = df.loc[date]
# res_df = df.loc[check_word]
## Sort res_df_ by datetime index, before resetting index
res_df.sort_index(inplace=True, ascending=ascending)
res_df.reset_index(inplace=True)
## If as_md, get md_text from df_to_md
if as_md:
df_to_show = res_df.iloc[:display_n]
if df_to_show.shape[0]>0:
header = f'### Showing {display_n} Tweets from "{date}"\n'
else:
header = f'### No Tweets found for "{date}"\n'
md_tweets = df_to_md(df_to_show,from_column=from_column)
text_out = header + md_tweets
return text_out
else:
return res_df
def df_to_md(res_df,show=False,from_column='content'):
from IPython.display import display, Markdown
import pandas as pd
df_md = res_df.copy()
date_format = '%m/%d/%Y - %T'
df_md['md_tweet'] = res_df[from_column].apply(lambda x: str(x))
df_md['md_date'] = res_df['date'].apply(lambda x: f'* ***Tweet from {x.strftime(date_format )}:***\n >')
df_md['out_text'] = df_md['md_date'] + df_md['md_tweet']
display_text = '\n'.join(df_md['out_text'])
if show:
display(Markdown(display_text))
return display_text
def search_for_tweets_prior_hour(twitter_df,stock_hour, from_column='content', ascending=False,
as_md=True):
"""Searches the df's `from_column` for the specified `word`
- `as_md`
- If True(default):
- Call `df_to_md` to get Markdown string from resulting df
- if False:
- Return as dataframe
- display the most-recent or oldest tweets using `ascending` parameter.
- if return_index: return the datetimeindex of the tweets containing the word."""
import pandas as pd
# import functions_combined_BEST as ji
from IPython.display import display
import numpy as np
fmt = '%m/%d/%Y %T'
## Make list of cols starting with from_column and adding display_cols
select_cols = [from_column]
# [select_cols.append(x) for x in display_cols]
# Create new df copy with select_cols
df = twitter_df[select_cols].copy()
## Make a timedelta of 1 hour to create daterange slice from [date+hr_ofst:date]
hr_ofst = pd.to_timedelta('-1 hour')
idx_end = stock_hour
idx_start = idx_end+hr_ofst
## Convert back to strings for pandas
idx_end = idx_end.strftime(fmt)
idx_start = idx_start.strftime(fmt)
## Get rows with the searched word
res_df = df.loc[idx_start:idx_end]
# res_df = df.loc[check_word]
## Sort res_df_ by datetime index, before resetting index
res_df.sort_index(inplace=True, ascending=ascending)
res_df.reset_index(inplace=True)
## If as_md, get md_text from df_to_md
if as_md:
df_to_show = res_df #.iloc[:display_n]
if df_to_show.shape[0]>0:
header = f'### Showing {df_to_show.shape[0]} Tweets from "{idx_start} to {idx_end}"\n'
else:
header = ''#f'### No Tweets found for "{idx_start} to {idx_end}"\n'
md_tweets = df_to_md(df_to_show,from_column=from_column)
text_out = header + md_tweets
return text_out
else:
return res_df