# Tweet processing

Possible things to practice

- get twee from que
- clean tweet text
- use cognitive services to lable tweet with sentiment
- save tweet to table storage

Load libraries

In [1]:
#r "nuget:NetEscapades.Configuration.Yaml"
#r "nuget:Azure.Storage.Queues"

Installed package NetEscapades.Configuration.Yaml version 2.1.0

Installed package Azure.Storage.Queues version 12.5.0

In [1]:
using System;
using System.Threading.Tasks;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.IO;
using System.Collections.Generic;
using System.Linq;

## Clear sample tweet

In [1]:
public interface ITextMarker
{
    public int Start { get; set; }
    public int End { get; set; }
}

public class HashtagMarker : ITextMarker
{
    public int Start { get; set; }
    public int End { get; set; }
    public string Tag { get; set; }
}

public class UrlMarker : ITextMarker
{
    public string DisplayUrl { get; set; }
    public int End { get; set; }
    public string ExpandedUrl { get; set; }
    public int Start { get; set; }
    public string Url { get; set; }
    public string UnwoundUrl { get; set; }
}

public class Tweet
{
    public string SearchHashtag { get; set; }
    public string Id { get; set; }
    public string AuthorId { get; set; }
    public DateTime CreatedAt { get; set; }
    public IList<HashtagMarker> Hashtags { get; set; }
    public IList<UrlMarker> Urls { get; set; }
    public string Text { get; set; }
}

In [1]:
var options = new JsonSerializerOptions {PropertyNameCaseInsensitive = true};
var sampleTweet = JsonSerializer.Deserialize<Tweet>(File.ReadAllText("sampleTweet.json"), options);
sampleTweet.Text


Szał Sylwestrowej Mody czyli Propozycja Nie Do Odrzucenia #AD2020 #NowyRok #2021 #Sylwester #Szampan #Party #Kwarantanna #COVID19 #Pidżama #Domówka https://t.co/F3GCmMpfms

In [1]:
markersToRemove

index,type,Start,End,Tag,DisplayUrl,ExpandedUrl,Url,UnwoundUrl
0,Submission#19+HashtagMarker,58,65,AD2020,,,,
1,Submission#19+HashtagMarker,66,74,NowyRok,,,,
2,Submission#19+HashtagMarker,81,91,Sylwester,,,,
3,Submission#19+HashtagMarker,92,100,Szampan,,,,
4,Submission#19+HashtagMarker,101,107,Party,,,,
5,Submission#19+HashtagMarker,108,120,Kwarantanna,,,,
6,Submission#19+HashtagMarker,121,129,COVID19,,,,
7,Submission#19+HashtagMarker,130,138,Pidżama,,,,
8,Submission#19+HashtagMarker,139,147,Domówka,,,,
9,Submission#19+UrlMarker,148,171,,minddrone.wordpress.com/2020/12/30/sza…,https://minddrone.wordpress.com/2020/12/30/szal-sylwestrowej-mody-czyli-mistrz-i-malgorzata-ad2020-2021/,https://t.co/F3GCmMpfms,<null>


In [1]:
// IEnumerable<ITextMarker> markersToRemove = (sampleTweet.Hashtags as IEnumerable<ITextMarker>)
//     .Concat(sampleTweet.Hashtags as IEnumerable<ITextMarker>);

var markersToRemove = sampleTweet.Hashtags.AsEnumerable<ITextMarker>()
    .Concat(sampleTweet.Urls)
    .OrderBy(m => m.Start);

var builder = new StringBuilder(sampleTweet.Text.Count());
var orginalText = sampleTweet.Text;
var currentPostion = 0;
foreach (var marker in markersToRemove)
{
    builder.Append(orginalText[currentPostion..marker.Start]);
    currentPostion = marker.End;
}

builder.ToString()

Szał Sylwestrowej Mody czyli Propozycja Nie Do Odrzucenia   #2021        

In [1]:
public string CleanTweetText(Tweet tweet)
{
    var markersToRemove = tweet.Hashtags.AsEnumerable<ITextMarker>()
        .Concat(tweet.Urls)
        .OrderBy(m => m.Start);

    var builder = new StringBuilder(tweet.Text.Count());
    var currentPostion = 0;
    foreach (var marker in markersToRemove)
    {
        builder.Append(tweet.Text[currentPostion..marker.Start]);
        currentPostion = marker.End;
    }

    return builder.ToString();
}

In [1]:
Console.WriteLine(sampleTweet.Text);
Console.WriteLine(CleanTweetText(sampleTweet));

Szał Sylwestrowej Mody czyli Propozycja Nie Do Odrzucenia #AD2020 #NowyRok #2021 #Sylwester #Szampan #Party #Kwarantanna #COVID19 #Pidżama #Domówka https://t.co/F3GCmMpfms


Szał Sylwestrowej Mody czyli Propozycja Nie Do Odrzucenia   #2021        
