Permalink
Cannot retrieve contributors at this time
2221 lines (1707 sloc)
59.1 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| foreach my $pluginid (qw/ | |
| Event::ArchiveTweetStreams | |
| Event::DeactivateTweetStreams | |
| Event::ExportTweetStreamPackage | |
| Event::LockingEvent | |
| Event::RecountTweetStreams | |
| Event::UpdateTweetStreamAbstracts | |
| Event::UpdateTweetStreams | |
| Event::WebObservatoryPush | |
| Export::TweetStream::CSV | |
| Export::TweetStream::GraphML | |
| Export::TweetStream::GoogleMap | |
| Export::TweetStream::HTML | |
| Export::TweetStream::JSON | |
| Screen::EPMC::tweepository | |
| Screen::ManageTweetstreamsLink | |
| Screen::RequestTweetStreamExport | |
| Screen::ExportArchivedPackage | |
| Screen::ExportUpdateLog | |
| Screen::GoogleMap | |
| Screen::TweetStreamSearch | |
| /) | |
| { | |
| $c->{plugins}{$pluginid}{params}{disable} = 0; | |
| } | |
| $c->{plugins}->{"Workflow::View"}->{appears}->{key_tools} = 100; | |
| $c->{plugins}->{"Workflow::Edit"}->{appears}->{key_tools} = 200; | |
| $c->{plugins}->{"RequestTweetStreamExport"}->{appears}->{key_tools} = 300; | |
| #turn off menus that aren't related to twitter harvesting | |
| if ($c->{tweepository_simplify_menus}) | |
| { | |
| $c->{plugins}{"Screen::DataSets"}{appears}{key_tools} = undef; | |
| $c->{plugins}{"Screen::Items"}{params}{disable} = 1; | |
| $c->{plugins}{"Screen::User::SavedSearches"}{params}{disable} = 1; | |
| $c->{plugins}{"Screen::Review"}{params}{disable} = 1; | |
| $c->{plugins}{"Screen::Staff::EPrintSearch"}{params}{disable} = 1; | |
| $c->{plugins}{"Screen::Staff::IssueSearch"}{params}{disable} = 1; | |
| $c->{plugins}{"Screen::Staff::HistorySearch"}{params}{disable} = 1; | |
| $c->{plugins}{"Screen::Admin::RegenAbstracts"}{params}{disable} = 1; | |
| $c->{plugins}{"Screen::Admin::RegenViews"}{params}{disable} = 1; | |
| $c->{plugins}{"Screen::Subject::Edit"}{params}{disable} = 1; | |
| $c->{plugins}{"Screen::MetaField::Listing"}{params}{disable} = 1; | |
| $c->{plugins}->{"Screen::FirstTool"}->{params}->{default} = "ManageTweetstreamsLink"; | |
| } | |
| $c->{block_map} = { | |
| #UpdateTweetStreams is the highest priority process, so it sits on top of eveything. | |
| #RecountTweetStreams is run by hand if something goes wrong with the running counts that UpdateTweetStreams does | |
| 'RecountTweetStreams' => [qw/ UpdateTweetStreams /], | |
| 'UpdateTweetStreams' => [qw/ RecountTweetStreams /], | |
| #UpdateTweetStreamAbstracts is the nightly job, and it will give way to the above processes to avoid hitting the database and delaying UpdateTweetStreams | |
| 'UpdateTweetStreamAbstracts' => [qw/ RecountTweetStreams UpdateTweetStreams /], | |
| #WebObservatoryPush uses very little resources, but should run after UpdateTweetstreamAbstracts, so is only blocked by that | |
| 'WebObservatoryPush' => [qw/ UpdateTweetStreamAbstracts UpdateTweetStreams /], | |
| #User Requested Package -- blocked by processes above as they keep the repository running | |
| 'ExportTweetStreamPackage' => [qw/ RecountTweetStreams UpdateTweetStreams UpdateTweetStreamAbstracts /], | |
| #Retiring Tweetstreams -- lowest priority | |
| 'DeactivateTweetStreams' => [qw/ RecountTweetStreams UpdateTweetStreams UpdateTweetStreamAbstracts ExportTweetStreamPackage /], | |
| 'ArchiveTweetStreams' => [qw/ RecountTweetStreams UpdateTweetStreams UpdateTweetStreamAbstracts ExportTweetStreamPackage DeactivateTweetStreams /] | |
| }; | |
| #aggregation of metadata | |
| #maps name of field in tweet to name of aggregate field in tweetstream | |
| #also controls whether the field will get tidied in the cache (throw away less frequent values to save space) | |
| $c->{'update_tweetstream_abstracts'}->{fieldmap} = { | |
| 'from_user' => { fieldname => 'top_from_users', subname => 'from_user', tidy => 1 }, | |
| 'created_at' => { fieldname => '##process_as_exception', tidy => 0 }, | |
| 'hashtags' => { fieldname => 'top_hashtags', subname => 'hashtag', tidy => 1}, | |
| 'tweetees' => { fieldname => 'top_tweetees', subname => 'tweetee', tidy => 1}, | |
| 'urls_from_text' => { fieldname => 'top_urls_from_text', subname => 'url_from_text', tidy => 1}, | |
| 'in_reply_to_status_id' => { fieldname => 'top_reply_tos', subname => 'reply_to', tidy => 1 }, | |
| 'retweeted_status_id' => { fieldname => 'top_retweeted', subname => 'retweeted', tidy => 1 }, | |
| }; | |
| #tweetstream latest_tool | |
| $c->{tweetstream_latest_tool_modes} = { | |
| default => { | |
| citation => "default", | |
| filters => [ | |
| { meta_fields => [ "tweet_count" ], value => "1-" } | |
| ], | |
| } | |
| }; | |
| #set up the datasets | |
| $c->{datasets}->{tweet} = { | |
| class => "EPrints::DataObj::Tweet", | |
| sqlname => "tweet", | |
| sql_counter => "tweetid", | |
| import => 1, | |
| index => 0, | |
| }; | |
| $c->{datasets}->{tweetstream} = { | |
| class => "EPrints::DataObj::TweetStream", | |
| sqlname => "tweetstream", | |
| sql_counter => "tweetstreamid", | |
| import => 1, | |
| index => 1, | |
| }; | |
| #lightweight dataobj for storing requests for export | |
| $c->{datasets}->{tsexport} = { | |
| class => "EPrints::DataObj::TweetStreamExport", | |
| sqlname => "tsexport", | |
| sql_counter => "tsexportid", | |
| import => 1, | |
| index => 0, | |
| }; | |
| $c->add_dataset_field( 'tsexport', { name=>"tsexportid", type=>"counter", required=>1, import=>0, can_clone=>1, sql_counter=>"tsexportid" }, ); | |
| $c->add_dataset_field( 'tsexport', { name=>"tweetstream", type=>"itemref", datasetid=> 'tweetstream', required => 1 }, ); | |
| $c->add_dataset_field( 'tsexport', { name=>"userid", type=>"itemref", datasetid=>"user", required=>1 }, ); | |
| $c->add_dataset_field( 'tsexport', { name=>"status", type=>"set", options => [qw( pending running finished )] }, ); | |
| $c->add_dataset_field( 'tsexport', | |
| { | |
| name=>"datestamp", type=>"time", required=>0, import=>0, | |
| render_res=>"minute", render_style=>"short", can_clone=>0 | |
| } ); | |
| $c->add_dataset_field( 'tsexport', | |
| { | |
| name=>"date_completed", type=>"time", required=>0, import=>0, | |
| render_res=>"minute", render_style=>"short", can_clone=>0 | |
| } ); | |
| #base metadata | |
| $c->add_dataset_field( 'tweet', { name=>"tweetid", type=>"counter", required=>1, import=>0, can_clone=>1, sql_counter=>"tweetid" }, ); | |
| $c->add_dataset_field( 'tweet', { name=>"twitterid", type=>"bigint", required=>1 }, ); | |
| #$c->add_dataset_field( 'tweet', { name=>"datestamp", type=>"date" }, ); #stores the creation time of the object | |
| $c->add_dataset_field( 'tweet', { name=>"json_source", type=>"storable", required=>1, render_value => 'EPrints::DataObj::Tweet::render_json_source' }, ); #full source kept for futureproofing | |
| #extracted tweet metadata | |
| $c->add_dataset_field( 'tweet', { name=>"text", type=>"text" }, ); | |
| $c->add_dataset_field( 'tweet', { name=>"from_user", type=>"text", render_value => 'EPrints::DataObj::Tweet::render_from_user' }, ); | |
| $c->add_dataset_field( 'tweet', { name=>"from_user_id", type=>"bigint" }, ); | |
| $c->add_dataset_field( 'tweet', { name=>"in_reply_to_status_id", type=>"bigint" }, ); | |
| $c->add_dataset_field( 'tweet', { name=>"retweeted_status_id", type=>"bigint" }, ); | |
| $c->add_dataset_field( 'tweet', { name=>"profile_image_url", type=>"url", render_value => 'EPrints::DataObj::Tweet::render_profile_image_url' }, ); | |
| $c->add_dataset_field( 'tweet', { name=>"iso_language_code", type=>"text" }, ); | |
| $c->add_dataset_field( 'tweet', { name=>"source", type=>"text" }, ); | |
| $c->add_dataset_field( 'tweet', { name=>"created_at", type=>"time"}, ); | |
| $c->add_dataset_field( 'tweet', { name=>"coordinates", type=>"text", multiple=>1}, ); | |
| #value added extraction and enrichment | |
| $c->add_dataset_field( 'tweet', { name=>"text_is_enriched", type=>"boolean" }, ); | |
| $c->add_dataset_field( 'tweet', { name=>"text_enriched", type=>"longtext", render_value => 'EPrints::DataObj::Tweet::render_text_enriched' }, ); | |
| $c->add_dataset_field( 'tweet', { name=>"tweetees", type=>"text", multiple=>1 }, ); | |
| $c->add_dataset_field( 'tweet', { name=>"hashtags", type=>"text", multiple=>1 }, ); | |
| #store URLs from tweet | |
| $c->add_dataset_field( 'tweet', { name=>"urls_from_text", type=>"url", multiple => 1 }, ); | |
| #store URL hops -- no longer used, but is valuable data. Should be reenabled later. | |
| $c->add_dataset_field( 'tweet', { name=>"target_urls", type=>"url", multiple => 1 }, ); | |
| #store URL hops -- no longer used, but is valuable data. Should be reenabled later. | |
| $c->add_dataset_field( 'tweet', { | |
| name=>"url_redirects", | |
| type=>"compound", | |
| multiple=>1, | |
| fields=>[ | |
| { | |
| sub_name=>"url", | |
| type=>"url", | |
| }, | |
| { | |
| sub_name=>"redirects_to", | |
| type=>"url", | |
| }, | |
| ] | |
| }, ); | |
| #a list of tweetstreams to which this tweet belongs | |
| #volatile to speed up operations which add and remove tweets from tweetstreams | |
| $c->add_dataset_field( 'tweet', { name=>"tweetstreams", type=>"itemref", datasetid=> 'tweetstream', required => 1, multiple => 1, volatile => 1 } ); | |
| #a flag to prevent enrichment being done more than once on commit | |
| $c->add_dataset_field( 'tweet', { name=>"newborn", type=>"boolean"}, ); | |
| #system metadata | |
| $c->add_dataset_field( 'tweetstream', { name=>"tweetstreamid", type=>"counter", required=>1, import=>0, can_clone=>1, sql_counter=>"tweetstreamid" }, ); | |
| $c->add_dataset_field( 'tweetstream', { name=>"userid", type=>"itemref", datasetid=>"user", required=>1 }, ); | |
| $c->add_dataset_field( 'tweetstream', { name =>"status", type => 'set', options => [ 'active', 'inactive', 'archived' ] } ); | |
| #core metadata (set by user) | |
| $c->add_dataset_field( 'tweetstream', { name=>"search_string", type=>"text", required=>"yes" }, ); | |
| $c->add_dataset_field( 'tweetstream', { name=>"geocode", type=>"text" }, ); | |
| $c->add_dataset_field( 'tweetstream', { name=>"expiry_date", type=>"date", required=>"yes" }, ); | |
| $c->add_dataset_field( 'tweetstream', { name=>"start_date", type=>"date" } ); | |
| $c->add_dataset_field( 'tweetstream', { name => "title", type=>'text' }, ); | |
| $c->add_dataset_field( 'tweetstream', { name => "abstract", type=>'longtext' }, ); | |
| $c->add_dataset_field( 'tweetstream', { name => "project_title", type=>'text' }, ); | |
| #state metadata (counts and stuff to avoid database queries) | |
| $c->add_dataset_field( 'tweetstream', { name=>"tweet_count", type=>'bigint', volatile=>1} ); | |
| $c->add_dataset_field( 'tweetstream', { name=>"oldest_tweets", type=>"itemref", datasetid=>'tweet', multiple => 1, render_value => 'EPrints::DataObj::TweetStream::render_tweet_field' }, ); | |
| $c->add_dataset_field( 'tweetstream', { name=>"newest_tweets", type=>"itemref", datasetid=>'tweet', multiple => 1, render_value => 'EPrints::DataObj::TweetStream::render_tweet_field' }, ); | |
| #digest information store anything that appears more than once. | |
| $c->add_dataset_field( 'tweetstream', { | |
| name => "top_hashtags", type=>"compound", multiple=>1, | |
| 'fields' => [ | |
| { | |
| 'sub_name' => 'hashtag', | |
| 'type' => 'text', | |
| }, | |
| { | |
| 'sub_name' => 'count', | |
| 'type' => 'int', | |
| }], | |
| render_value => 'EPrints::DataObj::TweetStream::render_top_tag_cloud', | |
| },); | |
| $c->add_dataset_field('tweetstream', { | |
| name => "top_from_users", type=>"compound", multiple=>1, | |
| 'fields' => [ | |
| { | |
| 'sub_name' => 'from_user', | |
| 'type' => 'text', | |
| }, | |
| { | |
| 'sub_name' => 'profile_image_url', | |
| 'type' => 'url', | |
| }, | |
| { | |
| 'sub_name' => 'count', | |
| 'type' => 'int', | |
| }], | |
| render_value => 'EPrints::DataObj::TweetStream::render_top_field', | |
| },); | |
| $c->add_dataset_field('tweetstream', { name => "top_tweetees", type=>"compound", multiple=>1, | |
| 'fields' => [ | |
| { | |
| 'sub_name' => 'tweetee', | |
| 'type' => 'text', | |
| }, | |
| { | |
| 'sub_name' => 'count', | |
| 'type' => 'int', | |
| } | |
| ], | |
| render_value => 'EPrints::DataObj::TweetStream::render_top_tag_cloud', | |
| },); | |
| $c->add_dataset_field('tweetstream', { name => "top_urls_from_text", type=>"compound", multiple=>1, | |
| 'fields' => [ | |
| { | |
| 'sub_name' => 'url_from_text', | |
| 'type' => 'url', | |
| }, | |
| { | |
| 'sub_name' => 'count', | |
| 'type' => 'int', | |
| } | |
| ], | |
| render_value => 'EPrints::DataObj::TweetStream::render_top_field', | |
| },); | |
| $c->add_dataset_field('tweetstream', { name => "top_reply_tos", type=>"compound", multiple=>1, | |
| 'fields' => [ | |
| { | |
| 'sub_name' => 'reply_to', | |
| 'type' => 'bigint', | |
| }, | |
| { | |
| 'sub_name' => 'count', | |
| 'type' => 'int', | |
| } | |
| ], | |
| render_value => 'EPrints::DataObj::TweetStream::render_top_field', | |
| },); | |
| $c->add_dataset_field('tweetstream', { name => "top_retweeted", type=>"compound", multiple=>1, | |
| 'fields' => [ | |
| { | |
| 'sub_name' => 'retweeted', | |
| 'type' => 'bigint', | |
| }, | |
| { | |
| 'sub_name' => 'count', | |
| 'type' => 'int', | |
| } | |
| ], | |
| render_value => 'EPrints::DataObj::TweetStream::render_top_field', | |
| },); | |
| $c->add_dataset_field('tweetstream', { name => 'newest_coordinates', type => 'compound', multiple => 1, | |
| 'fields' => [ | |
| { | |
| 'sub_name' => 'lon', | |
| 'type' => 'text', | |
| }, | |
| { | |
| 'sub_name' => 'lat', | |
| 'type' => 'text', | |
| } | |
| ], | |
| render_value => 'EPrints::DataObj::TweetStream::render_newest_coordinates', | |
| }); | |
| #for creation of the bar chart | |
| $c->add_dataset_field( 'tweetstream', { name => "frequency_period", type => 'set', options => [ 'daily', 'weekly', 'monthly', 'yearly' ] }, ); | |
| $c->add_dataset_field( 'tweetstream', { name => "frequency_values", type => 'compound', multiple=>1, | |
| 'fields' => [ | |
| { | |
| 'sub_name' => 'label', | |
| 'type' => 'text', | |
| }, | |
| { | |
| 'sub_name' => 'value', | |
| 'type' => 'int', | |
| } | |
| ], | |
| render_value => 'EPrints::DataObj::TweetStream::render_top_frequency_values', | |
| },); | |
| #for generating CSV, these store the highest count of each of the multiple fields | |
| $c->add_dataset_field( 'tweetstream', { name => "hashtags_ncols", type=>'int', volatile => '1' }, ); | |
| $c->add_dataset_field( 'tweetstream', { name => "tweetees_ncols", type=>'int', volatile => '1' }, ); | |
| $c->add_dataset_field( 'tweetstream', { name => "urls_from_text_ncols", type=>'int', volatile => '1' }, ); | |
| #fields used to render things on the abstract page | |
| $c->add_dataset_field( 'tweetstream', { name=>"rendered_tweetlist", virtual=> 1, type=>"int", render_value => 'EPrints::DataObj::TweetStream::render_tweet_list' }, ); | |
| $c->add_dataset_field( 'tweetstream', { name => "tools", type=>'boolean', virtual => '1', render_value => 'EPrints::DataObj::TweetStream::render_tools' }, ); | |
| $c->add_dataset_field( 'tweetstream', { name => "exports", type=>'boolean', virtual => '1', render_value => 'EPrints::DataObj::TweetStream::render_exports' }, ); | |
| #fields for exporting to a Web Observatory | |
| $c->add_dataset_field('tweetstream', { name => 'web_observatory_export', type => 'set', options => [ 'yes', 'no' ]}); | |
| $c->add_dataset_field('tweetstream', { name => 'web_observatory_id', type => 'text'}); | |
| $c->add_dataset_field('tweetstream', { name => 'web_observatory_collection', type => 'text'}); | |
| $c->add_trigger(EP_TRIGGER_VALIDATE_FIELD, sub | |
| { | |
| my %params = @_; | |
| my $field = $params{field}; | |
| my $dataobj = $params{dataobj}; | |
| my $value = $params{value}; | |
| my $problems = $params{problems}; | |
| if ($field->name eq 'web_observatory_export') | |
| { | |
| my $wo_status = $dataobj->validate_web_observatory_meta; | |
| if (!$wo_status->{valid}) | |
| { | |
| push @{$problems}, $dataobj->repository->xml->create_text_node("Web Observatory Export Issue: " . $wo_status->{msg}); | |
| } | |
| } | |
| return EP_TRIGGER_DONE; | |
| }); | |
| { | |
| package EPrints::DataObj::Tweet; | |
| our @ISA = ( 'EPrints::DataObj' ); | |
| use EPrints; | |
| use EPrints::Search; | |
| use JSON; | |
| use Date::Parse; | |
| use URI::Find; | |
| use HTML::Entities; | |
| use strict; | |
| ###################################################################### | |
| =pod | |
| =item $tweet = EPrints::DataObj::Tweet->new( $session, $tweetid ) | |
| Load the tweet with the ID of $tweetid from the database and return | |
| it as an EPrints::DataObj::Tweet object. | |
| =cut | |
| ###################################################################### | |
| sub new | |
| { | |
| my( $class, $session, $tweetid ) = @_; | |
| return $session->get_database->get_single( | |
| $session->get_repository->get_dataset( "tweet" ), | |
| $tweetid ); | |
| } | |
| ###################################################################### | |
| =pod | |
| =item $tweet = EPrints::DataObj::Tweet->new_from_data( $session, $data ) | |
| Construct a new EPrints::DataObj::Tweet object based on the $data hash | |
| reference of metadata. | |
| Used to create an object from the data retrieved from the database. | |
| =cut | |
| ###################################################################### | |
| sub new_from_data | |
| { | |
| my( $class, $session, $known ) = @_; | |
| return $class->SUPER::new_from_data( | |
| $session, | |
| $known, | |
| $session->get_repository->get_dataset( "tweet" ) ); | |
| } | |
| ###################################################################### | |
| # =pod | |
| # | |
| # =item $dataobj = EPrints::DataObj->create_from_data( $session, $data, $dataset ) | |
| # | |
| # Create a new object of this type in the database. | |
| # | |
| # $dataset is the dataset it will belong to. | |
| # | |
| # $data is the data structured as with new_from_data. | |
| # | |
| # =cut | |
| ###################################################################### | |
| sub create_from_data | |
| { | |
| my( $class, $session, $data, $dataset ) = @_; | |
| my $new_tweet = $class->SUPER::create_from_data( $session, $data, $dataset ); | |
| #$new_tweet->set_value( "datestamp", EPrints::Time::get_iso_timestamp() ); | |
| $new_tweet->update_triggers(); | |
| if( scalar( keys %{$new_tweet->{changed}} ) > 0 ) | |
| { | |
| # Remove empty slots in multiple fields | |
| $new_tweet->tidy; | |
| # Write the data to the database | |
| $session->get_database->update( | |
| $new_tweet->{dataset}, | |
| $new_tweet->{data}, | |
| $new_tweet->{changed} ); | |
| } | |
| $session->get_database->counter_minimum( "tweetid", $new_tweet->get_id ); | |
| return $new_tweet; | |
| } | |
| ###################################################################### | |
| =pod | |
| =item $dataset = EPrints::DataObj::Tweet->get_dataset_id | |
| Returns the id of the L<EPrints::DataSet> object to which this record belongs. | |
| =cut | |
| ###################################################################### | |
| sub get_dataset_id | |
| { | |
| return "tweet"; | |
| } | |
| ###################################################################### | |
| =pod | |
| =item $defaults = EPrints::DataObj::Tweet->get_defaults( $session, $data ) | |
| Return default values for this object based on the starting data. | |
| =cut | |
| ###################################################################### | |
| # inherits | |
| ###################################################################### | |
| =pod | |
| =item $tweet = EPrints::DataObj::Tweet::tweet_with_twitterid( $session, $twitterid ) | |
| Return the EPrints::tweet with the specified $twitterid, or undef if they | |
| are not found. | |
| =cut | |
| ###################################################################### | |
| sub tweet_with_twitterid | |
| { | |
| my( $repo, $twitterid ) = @_; | |
| my $dataset = $repo->dataset( "tweet" ); | |
| my $results = $dataset->search( | |
| filters => [ | |
| { | |
| meta_fields => [qw( twitterid )], | |
| value => $twitterid, match => "EX" | |
| } | |
| ]); | |
| return $results->item( 0 ); | |
| } | |
| ###################################################################### | |
| =pod | |
| =item $tweet->commit( [$force] ) | |
| Write this object to the database. | |
| If $force isn't true then it only actually modifies the database | |
| if one or more fields have been changed. | |
| If this item is not in any tweetstreams, then remove it. | |
| =cut | |
| ###################################################################### | |
| sub commit | |
| { | |
| my( $self, $force ) = @_; | |
| my $repo = $self->repository; | |
| $self->set_value('newborn', 'TRUE') if !$self->is_set('newborn'); | |
| $self->update_triggers(); | |
| if ($self->get_value('newborn') eq 'TRUE') | |
| { | |
| if ($self->is_set('json_source')) #should always be true, but just in case.... | |
| { | |
| $self->process_json; | |
| } | |
| $self->set_value('tweetees', $self->tweetees); | |
| $self->set_value('hashtags', $self->hashtags); | |
| $self->set_value('newborn', 'FALSE'); | |
| $self->set_value('text_is_enriched', 'FALSE'); | |
| $self->enrich_text; | |
| } | |
| if( !defined $self->{changed} || scalar( keys %{$self->{changed}} ) == 0 ) | |
| { | |
| # don't do anything if there isn't anything to do | |
| return( 1 ) unless $force; | |
| } | |
| my $success = $self->SUPER::commit( $force ); | |
| return( $success ); | |
| } | |
| ###################################################################### | |
| =pod | |
| =item $success = $tweet->remove | |
| Remove this tweet from the database. | |
| =cut | |
| ###################################################################### | |
| sub remove | |
| { | |
| my( $self ) = @_; | |
| my $success = 1; | |
| # remove tweet record | |
| my $tweet_ds = $self->{session}->get_repository->get_dataset( "tweet" ); | |
| $success = $success && $self->{session}->get_database->remove( | |
| $tweet_ds, | |
| $self->get_value( "tweetid" ) ); | |
| return( $success ); | |
| } | |
| #takes an array ref and a (scalar or array ref) and returs an arrayref containing only one of each value | |
| sub dedup_add | |
| { | |
| my ($self, $arr_ref, $val) = @_; | |
| if (not ref $val) | |
| { | |
| $val = [$val]; | |
| } | |
| push @{$arr_ref}, @{$val}; | |
| my %dedup; | |
| foreach (@{$arr_ref}) | |
| { | |
| $dedup{$_} = 1; | |
| } | |
| my @deduped = keys %dedup; | |
| return \@deduped; | |
| } | |
| #remove from the passed tweetstream | |
| sub remove_from_tweetstream | |
| { | |
| my ($self, $tweetstream) = @_; | |
| my $new_tweetstreams = []; | |
| foreach my $id (@{$self->get_value('tweetstreams')}) | |
| { | |
| push @{$new_tweetstreams}, $id unless ( $id == $tweetstream->id ); | |
| } | |
| if (scalar @{$new_tweetstreams}) | |
| { | |
| $self->set_value('tweetstreams', $new_tweetstreams); | |
| $self->commit(1); | |
| } | |
| else | |
| { | |
| $self->remove; | |
| } | |
| } | |
| ###################################################################### | |
| =pod | |
| =item $success = $tweet->process_json | |
| Extract tweet metadata from the source json | |
| =cut | |
| ###################################################################### | |
| sub process_json | |
| { | |
| my ( $self ) = @_; | |
| return 0 unless $self->is_set('json_source'); | |
| my $tweet_data = $self->get_value('json_source'); | |
| #pull the data out and stick it in metafields | |
| foreach my $fieldname (qw/ text from_user from_user_id profile_image_url iso_language_code source in_reply_to_status_id /) | |
| { | |
| if ($tweet_data->{$fieldname}) | |
| { | |
| $self->set_value($fieldname, $tweet_data->{$fieldname}); | |
| } | |
| } | |
| if ( | |
| $tweet_data->{coordinates} | |
| && $tweet_data->{coordinates}->{coordinates} | |
| ) | |
| { | |
| my $lon = $tweet_data->{coordinates}->{coordinates}->[0]; | |
| my $lat = $tweet_data->{coordinates}->{coordinates}->[1]; | |
| $self->set_value('coordinates', [$lat,$lon]); | |
| } | |
| if (exists $tweet_data->{retweeted_status}) | |
| { | |
| my $retweeted_status_id = $tweet_data->{retweeted_status}->{id}; | |
| $self->set_value('retweeted_status_id', $retweeted_status_id) if $retweeted_status_id; | |
| } | |
| #API v1.1 hacks | |
| if ($tweet_data->{metadata}->{iso_language_code}) | |
| { | |
| $self->set_value('iso_language_code', $tweet_data->{metadata}->{iso_language_code}); | |
| } | |
| if ($tweet_data->{user}->{screen_name}) | |
| { | |
| $self->set_value('from_user', $tweet_data->{user}->{screen_name}); | |
| $self->set_value('from_user_id', $tweet_data->{user}->{id}); | |
| $self->set_value('profile_image_url', $tweet_data->{user}->{profile_image_url}); | |
| } | |
| #convert created at to eprints timestame | |
| my $time = str2time($tweet_data->{created_at}); | |
| $self->set_value('created_at',EPrints::Time::get_iso_timestamp($time)); | |
| return 1; | |
| } | |
| sub tweetees | |
| { | |
| my ($self) = @_; | |
| my $message = $self->get_value('text'); | |
| return [] unless $message; | |
| my @tweetees = ($message =~ m/\@[A-Za-z0-9-_]+/g); | |
| return \@tweetees; | |
| } | |
| sub hashtags | |
| { | |
| my ($self) = @_; | |
| my $message = $self->get_value('text'); | |
| return [] unless $message; | |
| my @tags = ($message =~ m/#[A-Za-z0-9-_]+/g); | |
| return \@tags; | |
| } | |
| sub enrich_text | |
| { | |
| my ($self) = @_; | |
| my $message = $self->get_value('text'); | |
| return unless $message; | |
| my $url_map = {}; | |
| my $expanded_urls = []; | |
| my $tweet_data = $self->get_value('json_source'); | |
| if ($tweet_data->{entities}) | |
| { | |
| foreach my $url_data (@{$tweet_data->{entities}->{urls}}) | |
| { | |
| $url_map->{$url_data->{url}} = $url_data->{expanded_url}; | |
| push @{$expanded_urls}, $url_data->{expanded_url}; | |
| } | |
| } | |
| my $expanded_message = $message; | |
| my @URLS; | |
| my $finder = URI::Find->new(sub{ | |
| my($uri, $orig_uri) = @_; | |
| my $url_to_use = $orig_uri; | |
| if ($url_map->{$orig_uri}) | |
| { | |
| $url_to_use = $url_map->{$orig_uri}; | |
| } | |
| push @URLS, $url_to_use; | |
| #escape HASH and AT symbols in the urls so that regexp for user and hashtag insertion don't change them | |
| $orig_uri =~ s/#/ESCAPED_HASH/g; | |
| $orig_uri =~ s/\@/ESCAPED_AT/g; | |
| return '<a href="'.$url_to_use.'">'.$orig_uri.'</a>'; | |
| }); | |
| $finder->find(\$expanded_message); | |
| #now insert links to hashtags and usernames - how do we stop this from modifying text inside a long URL | |
| $expanded_message =~ s|\@([A-Za-z0-9-_]+)|<a href="http://twitter.com/$1">$&</a>|g; | |
| $expanded_message =~ s|#([A-Za-z0-9-_]+)|<a href="http://twitter.com/hashtag/$1">$&</a>|g; | |
| #now unescape HASH and AT | |
| $expanded_message =~ s/ESCAPED_HASH/#/g; | |
| $expanded_message =~ s/ESCAPED_AT/\@/g; | |
| #figure out if we have urls from entities (backwards compatibility to when twitter didn't supply entities) | |
| if (scalar @{$expanded_urls}) | |
| { | |
| $self->set_value('urls_from_text', $self->remove_bad_urls($expanded_urls)); | |
| } | |
| else | |
| { | |
| $self->set_value('urls_from_text', $self->remove_bad_urls(\@URLS)); | |
| } | |
| $self->set_value('text_enriched', "$expanded_message"); #should have all the links expanded out now. | |
| $self->set_value('text_is_enriched', 'TRUE'); | |
| } | |
| #a number of URLs are detected that are not usefule. Remove these. | |
| sub remove_bad_urls | |
| { | |
| my ($self, $urls) = @_; | |
| my $good_urls; | |
| my $bad_urls = { | |
| 'http://' => 1, | |
| 'http:/' => 1, | |
| 'http:' => 1, | |
| 'http://t.c' => 1, | |
| 'http://t' => 1, | |
| 'http://t.co' => 1, #technically valid, but almost always a mistake | |
| 'https://' => 1, | |
| 'https:/' => 1, | |
| 'https:' => 1, | |
| 'https://t.c' => 1, | |
| 'https://t' => 1, | |
| 'https://t.co' => 1, #technically valid, but almost always a mistake | |
| }; | |
| foreach my $url (@{$urls}) | |
| { | |
| next if $bad_urls->{$url}; | |
| push @{$good_urls}, $url; | |
| } | |
| return $good_urls; | |
| } | |
| sub render_li | |
| { | |
| my ($self) = @_; | |
| my $xml = $self->{session}->xml; | |
| my $twitterid = $self->get_value('twitterid'); | |
| my $li = $xml->create_element('li', class=>'tweet', id=>'tweet-' . $twitterid); | |
| $li->appendChild($self->render_span); | |
| return $li; | |
| } | |
| sub render_span | |
| { | |
| my ( $self ) = @_; | |
| my $xml = $self->{session}->xml; | |
| my $twitterid = $self->get_value('twitterid'); | |
| my $span = $xml->create_element('span', class=>'tweet-body'); | |
| my $anchor = $xml->create_element('a', name => $twitterid); | |
| $span->appendChild($anchor); | |
| $span->appendChild($self->render_value('profile_image_url')); | |
| my $text_part = $xml->create_element('span', class=>'tweet-text-part'); | |
| $span->appendChild($text_part); | |
| $text_part->appendChild($self->render_value('from_user')); | |
| $text_part->appendChild($xml->create_text_node(' ')); | |
| my $text_span = $xml->create_element('span', class=>'text', id=>'tweet-'.$self->get_value('twitterid')); | |
| $text_part->appendChild($self->render_value('text_enriched')); | |
| $text_part->appendChild($xml->create_text_node(' ')); | |
| my $meta_span = $xml->create_element('span', class=>'meta'); | |
| $meta_span->appendChild($self->render_value('created_at')); | |
| $meta_span->appendChild($xml->create_element('br')); | |
| $meta_span->appendChild($xml->create_text_node('Tweet ID: ' . $self->get_value('twitterid'))); | |
| $text_part->appendChild($meta_span); | |
| return $span; | |
| } | |
| sub render_json_source | |
| { | |
| my( $session , $field , $value , $alllangs , $nolink , $object ) = @_; | |
| my $json = JSON->new->allow_nonref; | |
| my $json_data = $json->pretty->encode($value); | |
| return EPrints::Extras::render_preformatted_field($session, $field, $json_data, $alllangs , $nolink , $object); | |
| } | |
| sub render_profile_image_url | |
| { | |
| my( $session , $field , $value , $alllangs , $nolink , $object ) = @_; | |
| my $xml = $session->xml; | |
| my $span = $xml->create_element('span', class=>'author-thumb'); | |
| my $a = $xml->create_element('a', href=>'http://twitter.com/' . $object->get_value('from_user')); | |
| $a->appendChild($xml->create_element('img', height=>"48", width=>"48", class=>'author-thumb', src=>$value, onerror=>"this.onerror=''; this.src='/images/tweepository/egg.png';")); | |
| $span->appendChild($a); | |
| return $span; | |
| } | |
| sub render_from_user | |
| { | |
| my( $session , $field , $value , $alllangs , $nolink , $object ) = @_; | |
| my $xml = $session->xml; | |
| my $a = $xml->create_element('a', href=>'http://twitter.com/' . $value); | |
| $a->appendChild($xml->create_text_node($value)); | |
| return $a; | |
| } | |
| sub render_text_enriched | |
| { | |
| my( $session , $field , $value , $alllangs , $nolink , $object ) = @_; | |
| return $object->render_value('text') unless $value; #enrich_text may not have been called | |
| my $xml = $session->xml; | |
| my $text_span = $xml->create_element('span', class=>'text', id=>'tweet-'.$object->get_value('twitterid')); | |
| #I'm not sure I'm doing this right, but I've found a way that works. What's the EPrints way of doing this? | |
| my $doc = eval { EPrints::XML::parse_xml_string( "<fragment>".$value."</fragment>" ); }; | |
| # my $doc = eval { EPrints::XML::parse_xml_string( "<fragment>".decode_entities($value)."</fragment>" ); }; | |
| if( $@ ) | |
| { | |
| $session->get_repository->log( "Error rendering text_enriched on tweet " . $object->get_id . " for text:\n\t$value\nError:\n\t$@" ); | |
| return $object->render_value('text'); #fall back to the simple text value #fall back to the simple text value #fall back to the simple text value | |
| } | |
| else | |
| { | |
| my $top = ($doc->getElementsByTagName( "fragment" ))[0]; | |
| foreach my $node ( $top->getChildNodes ) | |
| { | |
| $text_span->appendChild( | |
| $session->clone_for_me( $node, 1 ) ); | |
| } | |
| EPrints::XML::dispose( $doc ); | |
| } | |
| return $text_span; | |
| } | |
| sub data_for_export | |
| { | |
| my ($self) = @_; | |
| my $data; | |
| if ($self->is_set('json_source')) | |
| { | |
| $data = $self->value('json_source'); | |
| } | |
| else #should never be true, but let's have something to fall back to/ | |
| { | |
| foreach my $fieldname (qw/ from_user text created_at /) | |
| { | |
| $data->{$fieldname} = $self->value($fieldname) if $self->is_set($fieldname); | |
| } | |
| } | |
| foreach my $fieldname (qw/ text_enriched urls_from_text /) | |
| { | |
| $data->{eprints_value_added}->{$fieldname} = $self->value($fieldname) if ($self->is_set($fieldname)); | |
| } | |
| return $data; | |
| } | |
| 1; | |
| } | |
| { | |
| package EPrints::DataObj::TweetStream; | |
| our @ISA = ( 'EPrints::DataObj' ); | |
| use EPrints; | |
| use EPrints::Search; | |
| use Date::Calc qw/ Week_of_Year Delta_Days Add_Delta_Days /; | |
| use File::Path qw/ make_path /; | |
| use strict; | |
| #add an arrayref of tweet objects to this tweetstream | |
| #note that this function commits the object | |
| sub add_tweets | |
| { | |
| my ($self, $tweets) = @_; | |
| return unless scalar @{$tweets}; #paranoia -- make sure there are tweets in the array | |
| my $repo = $self->repository; | |
| my $tweet_ds = $repo->dataset('tweet'); | |
| #we may need to update the highest and lowest. Initialise varables to assist with this. | |
| my $refresh_needed = {}; | |
| my $highest_and_lowest = {}; | |
| foreach my $fieldname(qw/ newest_tweets oldest_tweets /) | |
| { | |
| $refresh_needed->{$fieldname} = 0; | |
| $highest_and_lowest->{$fieldname} = 0; | |
| if ($self->is_set($fieldname)) | |
| { | |
| my $tweetids = $self->value($fieldname); | |
| my $index = 0; | |
| $index = -1 if $fieldname eq 'newest_tweets'; | |
| my $tweet = $tweet_ds->dataobj($tweetids->[$index]); | |
| $highest_and_lowest->{$fieldname} = $tweet->value('twitterid') if $tweet; | |
| } | |
| $refresh_needed->{$fieldname} = 1 unless $highest_and_lowest->{$fieldname}; | |
| } | |
| #keep a rolling count of the number of tweets -- initialise the count | |
| my $tweet_count; | |
| if (!$self->is_set('tweet_count')) | |
| { | |
| $tweet_count = $self->count_with_query; | |
| } | |
| else | |
| { | |
| $tweet_count = $self->value('tweet_count'); | |
| } | |
| #now add each tweet | |
| foreach my $tweet (@{$tweets}) | |
| { | |
| if ($self->_add_tweet($tweet)) | |
| { | |
| $tweet_count++; | |
| #do we need to update oldest or newest? | |
| $refresh_needed->{newest_tweets} = 1 if ($tweet->value('twitterid') > $highest_and_lowest->{newest_tweets}); | |
| $refresh_needed->{oldest_tweets} = 1 if ($tweet->value('twitterid') < $highest_and_lowest->{oldest_tweets}); | |
| #collect the most recent N coordinates | |
| if ($tweet->is_set('coordinates')) | |
| { | |
| my $newest_coords = $self->value('newest_coordinates'); | |
| $newest_coords = [] unless defined $newest_coords; | |
| my $coords = $tweet->value('coordinates'); | |
| push @{$newest_coords}, { lat => $coords->[0], lon => $coords->[1] }; | |
| my $n = $repo->config('tweepository_newest_coordinates_n'); | |
| $n = 100 unless $n; #need config variable | |
| if ( (scalar @{$newest_coords}) > $n) | |
| { | |
| my @c = @{$newest_coords}[-$n..-1]; | |
| $newest_coords = \@c; | |
| } | |
| $self->set_value('newest_coordinates', $newest_coords); | |
| } | |
| #update ncols fields if necessary | |
| foreach my $tweet_fieldname (qw/ hashtags tweetees urls_from_text /) | |
| { | |
| if ($tweet->is_set($tweet_fieldname)) | |
| { | |
| my $val = $tweet->value($tweet_fieldname); | |
| my $n = scalar @{$val}; | |
| my $tweetstream_fieldname = $tweet_fieldname . '_ncols'; | |
| if ($self->is_set($tweetstream_fieldname)) | |
| { | |
| my $ts_n = $self->value($tweetstream_fieldname); | |
| if ($n > $ts_n) | |
| { | |
| $self->set_value($tweetstream_fieldname, $n); | |
| } | |
| } | |
| else | |
| { | |
| $self->set_value($tweetstream_fieldname, $n); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| #update oldest and newest tweets if needed | |
| foreach my $fieldname (qw/ newest_tweets oldest_tweets /) | |
| { | |
| if ($refresh_needed->{$fieldname}) | |
| { | |
| my $val = $self->_generate_oldest_or_youngest_tweets($fieldname, $tweets); | |
| $self->set_value($fieldname, $val); | |
| } | |
| } | |
| $self->set_value('tweet_count', $tweet_count); | |
| $self->commit; | |
| } | |
| sub _merge_tweets_with_tweet_field | |
| { | |
| my ($self, $obj_array, $fieldid) = @_; | |
| my $repo = $self->repository; | |
| my $tweet_ds = $repo->dataset('tweet'); | |
| my @new_array; | |
| foreach my $obj (@{$obj_array}) | |
| { | |
| push @new_array, $obj; | |
| } | |
| if ($self->is_set($fieldid)) | |
| { | |
| my $objids = $self->value($fieldid); | |
| foreach my $objid (@{$objids}) | |
| { | |
| my $obj = $tweet_ds->dataobj($objid); | |
| push @new_array, $obj if $obj; | |
| } | |
| } | |
| return \@new_array; | |
| } | |
| sub _generate_oldest_or_youngest_tweets | |
| { | |
| my ($self, $fieldname, $new_tweets) = @_; | |
| my $repo = $self->repository; | |
| my $all_tweets = $self->_merge_tweets_with_tweet_field($new_tweets, $fieldname); | |
| my $n = 100; | |
| $n = $repo->config('tweetstream_tweet_renderopts','n_oldest') if ($fieldname eq 'oldest_tweets'); | |
| $n = $repo->config('tweetstream_tweet_renderopts','n_newest') if ($fieldname eq 'newest_tweets'); | |
| my @sorted_tweets = sort {$a->value('twitterid') <=> $b->value('twitterid')} @{$all_tweets}; | |
| if ($fieldname eq 'newest_tweets') | |
| { | |
| @sorted_tweets = @sorted_tweets[(1-$n)..-1]; #get highest IDs (RHS of the array); | |
| } | |
| else | |
| { | |
| @sorted_tweets = @sorted_tweets[0..($n-1)]; #get lowest IDs (LHS of the array); | |
| } | |
| my $tweetids = []; | |
| foreach my $tweet (@sorted_tweets) | |
| { | |
| next unless $tweet; #safety check | |
| push @{$tweetids}, $tweet->value('tweetid'); | |
| } | |
| return $tweetids; | |
| } | |
| #not intended to be called, use add_tweets instead | |
| sub _add_tweet | |
| { | |
| my ($self, $tweet) = @_; | |
| #don't add if it's already in this tweetstream | |
| my $tsids = $tweet->value('tweetstreams'); | |
| foreach my $tsid (@{$tsids}) | |
| { | |
| return 0 if $tsid == $self->value('tweetstreamid'); | |
| } | |
| push @{$tsids}, $self->value('tweetstreamid'); | |
| $tweet->set_value('tweetstreams', $tsids); | |
| $tweet->commit(1); #force commit -- tweetstreams is a volatile value | |
| return 1; | |
| } | |
| sub pending_package_request | |
| { | |
| my ($self) = @_; | |
| return $self->_package_request('pending'); | |
| } | |
| sub running_package_request | |
| { | |
| my ($self) = @_; | |
| return $self->_package_request('running'); | |
| } | |
| sub _package_request | |
| { | |
| my ($self, $status) = @_; | |
| my $repo = $self->repository; | |
| my $ds = $repo->dataset('tsexport'); | |
| my %options; | |
| $options{filters} = [{ | |
| meta_fields => [qw( tweetstream )], | |
| value => $self->id, | |
| }, | |
| { | |
| meta_fields => [qw( status )], | |
| value => $status, | |
| }, | |
| ]; | |
| $options{custom_order} = '-tsexportid'; | |
| my $list = $ds->search(%options); | |
| if ($list->count >= 1) | |
| { | |
| return $list->item(0); | |
| } | |
| return undef; | |
| } | |
| sub delete_export_package | |
| { | |
| my ($self) = @_; | |
| if (-e $self->export_package_filepath) | |
| { | |
| unlink $self->export_package_filepath | |
| } | |
| } | |
| sub export_package_filename | |
| { | |
| my ($self) = @_; | |
| my $target_dir = $self->export_package_directory; | |
| my $filename = 'tweetstream' . $self->id . '_package'; | |
| my $zip_filename = $filename . '.zip'; | |
| my $tar_filename = $filename . '.tar.gz'; | |
| #this code handles the switch from .zip to .tar.gz -- only return .zip for existing packages | |
| my $zip_path = $target_dir . $zip_filename; | |
| my $tar_path = $target_dir . $tar_filename; | |
| #if there's an existing file, return that (there should only ever be one) | |
| return $zip_filename if -e $zip_path; | |
| return $tar_filename if -e $tar_path; | |
| #there isn't an existing one -- we should use zip files for small and tar files for big tweetstreams | |
| #the tweet count is a rolling total and get incremented every time a tweet is added, so this should be safe | |
| if ($self->value('tweet_count') > $self->repository->config('tweepository_archive_tar_threshold')) | |
| { | |
| return $tar_filename; | |
| } | |
| return $zip_filename; | |
| } | |
| sub export_package_directory | |
| { | |
| my ($self) = @_; | |
| if ($self->repository->config('tweetstream_export_package_directory')) | |
| { | |
| return $self->repository->config('tweetstream_export_package_directory'); | |
| } | |
| my $target_dir = $self->repository->config('archiveroot') . '/tweepository_packages/export/'; | |
| make_path($target_dir) unless -d $target_dir; | |
| return $target_dir; | |
| } | |
| sub update_log_filepath | |
| { | |
| my ($self) = @_; | |
| my $repo = $self->repository; | |
| my $repository = @_; | |
| my $target_dir = $self->export_package_directory; | |
| my $filename = 'tweetstream' . $self->id . '_updates.csv'; | |
| return "$target_dir$filename"; | |
| } | |
| sub log_update | |
| { | |
| my ($self, $start_time, $end_time, $count, $status) = @_; | |
| my $file = $self->update_log_filepath; | |
| if (!-e $file) | |
| { | |
| open FILE, ">$file"; #handle exception | |
| print FILE EPrints::Plugin::Export::TweetStream::CSV::csv("Start Time", "End Time", "Number of Tweets Added", "End State"); #headings | |
| close FILE; | |
| } | |
| open FILE, ">>$file"; #handle exception | |
| print FILE EPrints::Plugin::Export::TweetStream::CSV::csv($start_time, $end_time, $count, $status); | |
| } | |
| sub export_package_filepath | |
| { | |
| my ($self) = @_; | |
| my $repo = $self->repository; | |
| my $repository = @_; | |
| my $target_dir = $self->export_package_directory; | |
| my $filename = $self->export_package_filename; | |
| return "$target_dir$filename"; | |
| } | |
| sub render_exports | |
| { | |
| my( $session , $field , $value , $alllangs , $nolink , $object ) = @_; | |
| return $object->render_exporters; | |
| } | |
| sub validate_web_observatory_meta | |
| { | |
| my ($self) = @_; | |
| my $repo = $self->repository; | |
| #export not set -> valid | |
| return { valid => 1 } unless $self->is_set('web_observatory_export'); | |
| #export set to no -> valid | |
| return { valid => 1 } unless $self->value('web_observatory_export') eq 'yes'; | |
| #validate other bits of meta | |
| my $wo_id = $self->value('web_observatory_id'); | |
| my $wo_conf = $repo->config('web_observatories'); | |
| #is the Web Observatory Configured? | |
| if (!$wo_conf->{$wo_id}) | |
| { | |
| return {valid => 0, msg => 'Bad Web Observatory ID'} | |
| }; | |
| #does the record owner have permission to do this? | |
| my $userid = $self->value('userid'); | |
| my $user = $repo->dataset('user')->dataobj($userid); | |
| unless ( | |
| $user | |
| && $user->is_set('username') | |
| && _in_array($user->value('username'), $wo_conf->{$wo_id}->{authorised_users})) | |
| { | |
| return { valid => 0, msg => 'Unauthorised User Account (based on who created the record)' }; | |
| } | |
| return { valid => 0, msg => 'You must specify a collection within the observatory' } unless $self->is_set('web_observatory_collection'); | |
| my $collection = $self->value('web_observatory_collection'); | |
| if ($collection !~ m/^[a-zA-Z0-9]+$/) | |
| { | |
| return { valid => 0, msg => 'Collection IDs must be alphanumeric only' }; | |
| } | |
| return { valid => 1 }; | |
| } | |
| sub _in_array | |
| { | |
| my ($val, $arrayref) = @_; | |
| foreach my $v (@{$arrayref}) | |
| { | |
| return 1 if $v eq $val; | |
| } | |
| return 0; | |
| } | |
| sub render_top_tag_cloud | |
| { | |
| my( $session , $field , $value , $alllangs , $nolink , $object ) = @_; | |
| if (!(scalar @{$value})) | |
| { | |
| return $session->html_phrase('top_field_no_data'); | |
| } | |
| my $fieldname = $field->name; | |
| my $cloud_min = 80; | |
| my $cloud_max = 200; | |
| my $range = $cloud_max - $cloud_min; | |
| #first find the highest to scale all others | |
| my $highest = 0; | |
| my $lowest = undef; | |
| foreach my $v (@{$value}) | |
| { | |
| $lowest = $v->{count} unless defined $lowest; | |
| $lowest = $v->{count} if $v->{count} < $lowest; | |
| $highest = $v->{count} if $v->{count} > $highest; | |
| } | |
| my $tags = []; | |
| foreach my $single_value (@{$value}) | |
| { | |
| my $label = render_top_lhs($session, $fieldname, $single_value); | |
| my $size = int( ( log(1+($single_value->{count} - $lowest)) / log(1+($highest-$lowest)) ) * $range ) + $cloud_min; | |
| my $span = $session->make_element( "span", class => 'tag-cloud-item', style=>"font-size: $size\%" ); | |
| $span->appendChild( $label ); | |
| push @{$tags}, $span; | |
| } | |
| my $cloud = $session->xml->create_document_fragment; | |
| foreach my $tag (sort { lc(EPrints::Utils::tree_to_utf8($a)) cmp lc(EPrints::Utils::tree_to_utf8($b))} @{$tags}) | |
| { | |
| $cloud->appendChild($tag); | |
| $cloud->appendChild($session->xml->create_text_node(' ')); | |
| } | |
| return $cloud; | |
| } | |
| sub render_top_frequency_values | |
| { | |
| my( $session , $field , $value , $alllangs , $nolink , $object ) = @_; | |
| if (!(scalar @{$value})) | |
| { | |
| return $session->html_phrase('top_field_no_data'); | |
| } | |
| #first find the highest to scale all others | |
| my $highest = 0; | |
| foreach (@{$value}) | |
| { | |
| $highest = $_->{value} if $_->{value} > $highest; | |
| } | |
| my $table = $session->make_element('table', class=>"tweetstream_graph"); | |
| foreach my $pair (@{$value}) | |
| { | |
| my $tr = $session->make_element('tr'); | |
| $table->appendChild($tr); | |
| my $th = $session->make_element('th'); | |
| $tr->appendChild($th); | |
| $th->appendChild($session->make_text($pair->{label})); | |
| my $td = $session->make_element('td', class => "tweetstream_bar"); | |
| my $width = int (($pair->{value} / $highest) * 100); | |
| my $div = $session->make_element('div', style => "width: $width%"); | |
| $td->appendChild($div); | |
| $tr->appendChild($td); | |
| $td = $session->make_element('td'); | |
| $td->appendChild($session->make_text($pair->{value})); | |
| $tr->appendChild($td); | |
| } | |
| return $table; | |
| } | |
| sub get_url | |
| { | |
| my ($self) = @_; | |
| return $self->uri; | |
| } | |
| sub render_newest_coordinates | |
| { | |
| my( $repo , $field , $value , $alllangs , $nolink , $object ) = @_; | |
| my $xml = $repo->xml; | |
| my $plugin = $repo->plugin('Export::TweetStream::GoogleMap'); | |
| my $url = $plugin->dataobj_export_url($object); | |
| $url =~ s#^[^/]*//#//#; | |
| my $iframe = $xml->create_element('iframe', id => 'map-iframe', src => $url); | |
| return $iframe; | |
| } | |
| sub render_top_field | |
| { | |
| my( $session , $field , $value , $alllangs , $nolink , $object ) = @_; | |
| if (!(scalar @{$value})) | |
| { | |
| return $session->html_phrase('top_field_no_data'); | |
| } | |
| my $rows; | |
| my $fieldname = $field->name; | |
| foreach my $single_value (@{$value}) | |
| { | |
| my $tr = $session->make_element('tr'); | |
| my $td = $session->make_element('td'); | |
| $tr->appendChild($td); | |
| $td->appendChild(render_top_lhs($session, $fieldname, $single_value)); | |
| $td = $session->make_element('td'); | |
| $td->appendChild(render_top_rhs($session, $fieldname, $single_value)); | |
| $tr->appendChild($td); | |
| push @{$rows}, $tr; | |
| } | |
| return columned_table($session, $rows, $session->config('tweetstream_tops',$fieldname,'cols')); | |
| } | |
| sub render_top_lhs | |
| { | |
| my ($session, $fieldname, $stuff) = @_; | |
| if ($fieldname eq 'top_hashtags') | |
| { | |
| my $value = $stuff->{hashtag}; | |
| my $max_render_len = $session->config('tweetstream_tops',$fieldname,'max_len'); | |
| my $hash_stripped_tag = $value; | |
| $hash_stripped_tag =~ s/^#//; | |
| my $url = 'http://twitter.com/hashtag/' . URI::Escape::uri_escape($hash_stripped_tag); | |
| my $a = $session->make_element('a', href=>$url, title=>$stuff->{count}); | |
| if (length $value > $max_render_len) | |
| { | |
| my $chars = $max_render_len - 3; | |
| $value = substr($value, 0, $chars) . '...'; | |
| } | |
| $a->appendChild($session->make_text($hash_stripped_tag)); | |
| return $a; | |
| }; | |
| if ($fieldname eq 'top_urls_from_text') | |
| { | |
| my $value = $stuff->{'url_from_text'}; | |
| my $max_render_len = $session->config('tweetstream_tops',$fieldname,'max_len'); | |
| my $a = $session->make_element('a', href=>$value, title=>$value); | |
| if (length $value > $max_render_len) | |
| { | |
| my $chars = $max_render_len - 3; | |
| $value = substr($value, 0, $chars) . '...'; | |
| } | |
| $a->appendChild($session->make_text($value)); | |
| return $a; | |
| }; | |
| if ($fieldname eq 'top_from_users') | |
| { | |
| my $base_url = 'http://twitter.com/'; | |
| my $img_url = $stuff->{profile_image_url}; | |
| my $user = $stuff->{from_user}; | |
| my $a = $session->make_element('a', href=>$base_url . $user, title=> $user); | |
| $a->appendChild($session->make_element('img', height=>"48", width=>"48",src=>$img_url, onerror=>"this.onerror=''; this.src='/images/tweepository/egg.png';")); | |
| return $a; | |
| } | |
| if ($fieldname eq 'top_tweetees') | |
| { | |
| my $base_url = 'http://twitter.com/'; | |
| my $user = $stuff->{tweetee}; | |
| my $a = $session->make_element('a', href=>$base_url . $user, title=> $stuff->{count}); | |
| $a->appendChild($session->make_text($user)); | |
| return $a; | |
| } | |
| if ( | |
| $fieldname eq 'top_reply_tos' | |
| || $fieldname eq 'top_retweeted' | |
| ) | |
| { | |
| my $twitterid = $stuff->{reply_to}; | |
| $twitterid = $stuff->{retweeted} if $fieldname eq 'top_retweeted'; | |
| my $tweet = EPrints::DataObj::Tweet::tweet_with_twitterid($session, $twitterid); | |
| if ($tweet) | |
| { | |
| return $tweet->render_span; | |
| } | |
| else | |
| { | |
| my $xml = $session->xml; | |
| my $frag = $xml->create_document_fragment; | |
| my $url = 'http://twitter.com/statuses/' . $twitterid; | |
| my $link = $session->render_link($url); | |
| $link->appendChild($xml->create_text_node($twitterid)); | |
| $frag->appendChild($xml->create_text_node('Uncollected tweet with id ')); | |
| $frag->appendChild($link); | |
| return $frag; | |
| } | |
| } | |
| #we should never get here | |
| return $session->make_text("$fieldname unhandled in render_top_lhs\n"); | |
| } | |
| sub render_top_rhs | |
| { | |
| my ($session, $fieldname, $stuff) = @_; | |
| if ($fieldname eq 'top_from_users') | |
| { | |
| my $frag = $session->make_doc_fragment; | |
| my $base_url = 'http://twitter.com/'; | |
| my $img_url = $stuff->{profile_image_url}; | |
| my $user = $stuff->{from_user}; | |
| my $a = $session->make_element('a', href=>$base_url . $user, title=> $user); | |
| $a->appendChild($session->make_text($user)); | |
| $frag->appendChild($a); | |
| $frag->appendChild($session->make_element('br')); | |
| $frag->appendChild($session->make_text($stuff->{count} . ' tweets')); | |
| return $frag; | |
| } | |
| else | |
| { | |
| return $session->make_text($stuff->{count}); | |
| } | |
| } | |
| sub columned_table | |
| { | |
| my ($session, $rows, $ncols ) = @_; | |
| my $nitems = scalar @{$rows}; | |
| my $col_len = POSIX::ceil( $nitems / $ncols ); | |
| my $table = $session->make_element('table'); | |
| my $tr = $session->make_element('tr'); | |
| $table->appendChild($tr); | |
| my $inside_table; | |
| for( my $i=0; $i < $nitems; ++$i ) | |
| { | |
| if( $i % $col_len == 0 ) | |
| { | |
| my $td = $session->make_element('td', valign => 'top'); | |
| $tr->appendChild($td); | |
| $inside_table = $session->make_element('table'); | |
| $td->appendChild($inside_table); | |
| } | |
| $inside_table->appendChild($rows->[$i]); | |
| } | |
| return $table; | |
| } | |
| ###################################################################### | |
| =pod | |
| =item $tweet = EPrints::DataObj::Tweet->new( $session, $tweetid ) | |
| Load the tweet with the ID of $tweetid from the database and return | |
| it as an EPrints::DataObj::Tweet object. | |
| =cut | |
| ###################################################################### | |
| sub new | |
| { | |
| my( $class, $session, $tweetid ) = @_; | |
| #bugfix | |
| $session->{xhtml} = $session->xhtml; | |
| return $session->get_database->get_single( | |
| $session->get_repository->get_dataset( "tweetstream" ), | |
| $tweetid ); | |
| } | |
| ###################################################################### | |
| =pod | |
| =item $tweet = EPrints::DataObj::Tweet->new_from_data( $session, $data ) | |
| Construct a new EPrints::DataObj::Tweet object based on the $data hash | |
| reference of metadata. | |
| Used to create an object from the data retrieved from the database. | |
| =cut | |
| ###################################################################### | |
| sub new_from_data | |
| { | |
| my( $class, $session, $known ) = @_; | |
| return $class->SUPER::new_from_data( | |
| $session, | |
| $known, | |
| $session->get_repository->get_dataset( "tweetstream" ) ); | |
| } | |
| ###################################################################### | |
| # =pod | |
| # | |
| # =item $dataobj = EPrints::DataObj->create_from_data( $session, $data, $dataset ) | |
| # | |
| # Create a new object of this type in the database. | |
| # | |
| # $dataset is the dataset it will belong to. | |
| # | |
| # $data is the data structured as with new_from_data. | |
| # | |
| # =cut | |
| ###################################################################### | |
| sub create_from_data | |
| { | |
| my( $class, $session, $data, $dataset ) = @_; | |
| my $new_tweetstream = $class->SUPER::create_from_data( $session, $data, $dataset ); | |
| $new_tweetstream->update_triggers(); | |
| if( scalar( keys %{$new_tweetstream->{changed}} ) > 0 ) | |
| { | |
| # Remove empty slots in multiple fields | |
| $new_tweetstream->tidy; | |
| # Write the data to the database | |
| $session->get_database->update( | |
| $new_tweetstream->{dataset}, | |
| $new_tweetstream->{data}, | |
| $new_tweetstream->{changed} ); | |
| } | |
| $session->get_database->counter_minimum( "tweetstreamid", $new_tweetstream->get_id ); | |
| return $new_tweetstream; | |
| } | |
| ###################################################################### | |
| =pod | |
| =item $dataset = EPrints::DataObj::Tweet->get_dataset_id | |
| Returns the id of the L<EPrints::DataSet> object to which this record belongs. | |
| =cut | |
| ###################################################################### | |
| sub get_dataset_id | |
| { | |
| return "tweetstream"; | |
| } | |
| ###################################################################### | |
| =pod | |
| =item $defaults = EPrints::DataObj::Tweet->get_defaults( $session, $data ) | |
| Return default values for this object based on the starting data. | |
| =cut | |
| ###################################################################### | |
| # inherits | |
| ###################################################################### | |
| =pod | |
| =item $tweet->commit( [$force] ) | |
| Write this object to the database. | |
| If $force isn't true then it only actually modifies the database | |
| if one or more fields have been changed. | |
| =cut | |
| ###################################################################### | |
| sub commit | |
| { | |
| my( $self, $force ) = @_; | |
| my $repo = $self->repository; | |
| $self->update_triggers(); | |
| $self->set_value('status', 'active') if !$self->is_set('status'); | |
| $self->set_value('tweet_count', 0) if !$self->is_set('tweet_count'); | |
| if (!$self->is_set('title') && $self->is_set('search_string')) | |
| { | |
| #sensible default | |
| $self->set_value('title', 'Twitter Feed for ' . $self->value('search_string')); | |
| } | |
| if ($self->is_set('oldest_tweets')) | |
| { | |
| my $ids = $self->value('oldest_tweets'); | |
| my $oldest_tweet = $self->repository->dataset('tweet')->dataobj($ids->[0]); | |
| if ($oldest_tweet) | |
| { | |
| my $t = $oldest_tweet->value('created_at'); | |
| $self->set_value('start_date', $t); #I checked the code, you can set a Date field with a Time type, it strips the time off. | |
| } | |
| } | |
| if( !defined $self->{changed} || scalar( keys %{$self->{changed}} ) == 0 ) | |
| { | |
| # don't do anything if there isn't anything to do | |
| return( 1 ) unless $force; | |
| } | |
| my $success = $self->SUPER::commit( $force ); | |
| return( $success ); | |
| } | |
| sub highest_twitterid | |
| { | |
| my ($self) = @_; | |
| return 0 unless $self->is_set('newest_tweets'); | |
| my $repo = $self->repository; | |
| my $tweet_ds = $repo->dataset('tweet'); | |
| my $newest_tweetids = $self->value('newest_tweets'); | |
| my $newest_tweet = $tweet_ds->dataobj($newest_tweetids->[-1]); | |
| return $newest_tweet->value('twitterid') if $newest_tweet; | |
| return 0; | |
| } | |
| sub highest_tweetid | |
| { | |
| my ($self) = @_; | |
| my $repo = $self->repository; | |
| my $db = $repo->database; | |
| my $sql = 'SELECT tweetid FROM tweet_tweetstreams WHERE tweetstreams = ' . | |
| $self->value('tweetstreamid') . ' ORDER BY tweetid DESC LIMIT 1'; | |
| my $sth = $db->prepare( $sql ); | |
| $sth->execute; | |
| return $sth->fetchrow_arrayref->[0]; | |
| } | |
| #how many tweets in this tweetstream. Optionally, specify a tweetid (not a twitterid) and we'll only count up to there | |
| #Note that this took a minute to count up to 3 million! | |
| sub count_with_query | |
| { | |
| my ($self, $highest_tweetid) = @_; | |
| #if it's archived, don't count from the database -- there may be no data there | |
| if ($self->value('status') eq 'archived') | |
| { | |
| return $self->value('tweet_count'); | |
| } | |
| my $repo = $self->repository; | |
| my $db = $repo->database; | |
| my $sql = 'SELECT COUNT(*) '; | |
| $sql .= 'FROM tweet JOIN tweet_tweetstreams ON tweet.tweetid = tweet_tweetstreams.tweetid '; | |
| $sql .= 'WHERE tweet_tweetstreams.tweetstreams = ' . $self->value('tweetstreamid'); | |
| if ($highest_tweetid) | |
| { | |
| $sql .= " AND tweet.tweetid <= $highest_tweetid"; | |
| } | |
| my $sth = $db->prepare( $sql ); | |
| $sth->execute; | |
| return $sth->fetchrow_arrayref->[0]; | |
| } | |
| #returns a page of tweets, or all of them if args not supplied | |
| #order_field enables the retrieval of tweets by tweetid -- much faster for larger twitter feeds | |
| sub tweets | |
| { | |
| my ($self, $limit, $lowest_id, $order_field) = @_; | |
| $order_field = 'twitterid' unless $order_field; | |
| my $ds = $self->repository->dataset('tweet'); | |
| my $search = $ds->prepare_search(custom_order => $order_field); | |
| $search->add_field($ds->get_field('tweetstreams'), $self->id); | |
| $search->set_property('limit', $limit) if $limit; | |
| $search->add_field($ds->get_field($order_field), "$lowest_id-") if $lowest_id; | |
| return $search->perform_search; | |
| } | |
| #returns the csv columns of a *Tweet* object, and the max multiplicity for this stream for each field | |
| sub csv_cols | |
| { | |
| my ($self) = @_; | |
| return | |
| [ | |
| { fieldname => "twitterid", ncols => 1 }, | |
| { fieldname => "from_user", ncols => 1 }, | |
| { fieldname => "from_user_id", ncols => 1 }, | |
| { fieldname => "created_at", ncols => 1 }, | |
| { fieldname => "text", ncols => 1 }, | |
| { fieldname => "profile_image_url", ncols => 1 }, | |
| { fieldname => "iso_language_code", ncols => 1 }, | |
| { fieldname => "source", ncols => 1 }, | |
| { fieldname => "text_enriched", ncols => 1 }, | |
| { fieldname => "coordinates", ncols => 2, }, | |
| { fieldname => "tweetees", ncols => ( $self->get_value('tweetees_ncols') ? $self->get_value('tweetees_ncols') : 1 ) }, | |
| { fieldname => "hashtags", ncols => ( $self->get_value('hashtags_ncols') ? $self->get_value('hashtags_ncols') : 1 ) }, | |
| { fieldname => "urls_from_text", ncols => ( $self->get_value('urls_from_text_ncols') ? $self->get_value('urls_from_text_ncols') : 1 ) }, | |
| ]; | |
| } | |
| ###################################################################### | |
| =pod | |
| =item $success = $tweetstream->remove | |
| Remove this tweetstream from the database. | |
| =cut | |
| ###################################################################### | |
| sub remove | |
| { | |
| my( $self ) = @_; | |
| my $success = 1; | |
| my $page_size = 1000; | |
| while (1) | |
| { | |
| my $tweets = $self->tweets($page_size, 0, 'tweetid'); | |
| last unless $tweets->count; #exit if there are no results returned | |
| $tweets->map( sub | |
| { | |
| my ($repo, $ds, $tweet, $tweetstream) = @_; | |
| $tweet->remove_from_tweetstream($self); | |
| }, $self); | |
| } | |
| # remove tweetstream record | |
| my $tweetstream_ds = $self->{session}->get_repository->get_dataset( "tweetstream" ); | |
| $success = $success && $self->{session}->get_database->remove( | |
| $tweetstream_ds, | |
| $self->get_value( "tweetstreamid" ) ); | |
| return( $success ); | |
| } | |
| #a parallel list of tweet ids (due to a utf8 issue) will be rendered as the number of tweets. | |
| sub render_tweetcount | |
| { | |
| my( $session , $field , $value , $alllangs , $nolink , $object ) = @_; | |
| my $xml = $session->xml; | |
| my $frag = $xml->create_document_fragment; | |
| $frag->appendChild($xml->create_text_node(scalar @{$value} . ' tweets')); | |
| return $frag; | |
| } | |
| sub render_tweet_field | |
| { | |
| my( $session , $field , $value , $alllangs , $nolink , $object ) = @_; | |
| my $xml = $session->xml; | |
| my $tweet_ds = $session->dataset('tweet'); | |
| my $frag = $xml->create_document_fragment; | |
| my $ol = $xml->create_element('ol', class => 'tweets'); | |
| $frag->appendChild($ol); | |
| foreach my $tweetid (@{$value}) | |
| { | |
| my $tweet = $tweet_ds->dataobj($tweetid); | |
| next unless $tweet; | |
| $ol->appendChild($tweet->render_li); | |
| } | |
| return $frag; | |
| } | |
| sub render_tweet_list | |
| { | |
| my( $repository , $field , $value , $alllangs , $nolink , $object ) = @_; | |
| my $xml = $repository->xml; | |
| my $tweet_ds = $repository->dataset('tweet'); | |
| my $frag = $xml->create_document_fragment; | |
| $frag->appendChild($object->render_value('oldest_tweets')); | |
| if ($object->is_set('newest_tweets')) #will only be set if weh have more than n_oldest + n_newest tweets | |
| { | |
| my $n_oldest = $repository->config('tweetstream_tweet_renderopts','n_oldest'); | |
| my $n_newest = $repository->config('tweetstream_tweet_renderopts','n_newest'); | |
| my $span = $xml->create_element('span', style => "margin-top: 1em; margin-bottom: 1em;"); | |
| $span->appendChild($xml->create_element('img', src=>"/images/tweepository/paper_tear-top.png", style=>"width: 480px")); | |
| $span->appendChild($xml->create_element('br')); | |
| $span->appendChild($repository->html_phrase('DataObj::Tweet/unshown_items', n=>$xml->create_text_node(($object->value('tweet_count') - ($n_oldest+$n_newest))))); | |
| $span->appendChild($xml->create_element('br')); | |
| $span->appendChild($xml->create_element('img', src=>"/images/tweepository/paper_tear-bottom.png", style=>"width: 480px")); | |
| $frag->appendChild($span); | |
| $frag->appendChild($object->render_value('newest_tweets')); | |
| } | |
| return $frag; | |
| } | |
| sub render_tools | |
| { | |
| my( $session , $field , $value , $alllangs , $nolink , $object ) = @_; | |
| my $processor = EPrints::ScreenProcessor->new( | |
| session => $session, | |
| dataobj => $object, | |
| dataset => $object->dataset, | |
| ); | |
| my $some_plugin = $session->plugin( "Screen", processor=>$processor ); | |
| my $table = $session->make_element('table', class => 'tweepository_summary_page_actions' ); | |
| my $icons_tr = $session->make_element('tr'); | |
| my $buttons_tr = $session->make_element('tr'); | |
| $table->appendChild($icons_tr); | |
| $table->appendChild($buttons_tr); | |
| my $tools = $session->config('tweepository_tools_on_summary_page'); | |
| foreach my $screenid (@{$tools}) | |
| { | |
| my $screen = $session->plugin( | |
| $screenid, | |
| processor => $processor, | |
| ); | |
| # my $screen = $item->{screen}; | |
| #print STDERR '::::'; | |
| #print STDERR join(', ',keys %{$item}), "\n"; | |
| next unless $screen; | |
| next unless $screen->can_be_viewed; | |
| my $params = { | |
| screen_id => $screen->get_id, | |
| screen => $screen, | |
| hidden => { | |
| dataobj => $object->id, | |
| dataset => 'tweetstream' | |
| } | |
| }; | |
| my $td = $session->make_element('td'); | |
| $icons_tr->appendChild($td); | |
| $td->appendChild($screen->render_action_icon($params)); | |
| $td = $session->make_element('td'); | |
| $buttons_tr->appendChild($td); | |
| $td->appendChild($screen->render_action_button($params)); | |
| } | |
| return $table; | |
| } | |
| sub _screenid_to_url | |
| { | |
| my ($self, $screenid) = @_; | |
| return $self->{session}->get_repository->get_conf( "http_cgiurl" ). | |
| '/users/home?screen=' . $screenid . | |
| '&dataset=tweetstream' . | |
| '&dataobj=' . $self->id; | |
| } | |
| sub render_exporters | |
| { | |
| my ($self) = @_; | |
| my $repository = $self->repository; | |
| my $xml = $repository->xml; | |
| my $tweet_count = $self->value('tweet_count'); | |
| $tweet_count = 0 unless $tweet_count; | |
| my $threshold = $repository->config('tweepository_export_threshold'); | |
| $threshold = 100000 unless $threshold; | |
| my $plugin_list = 'tweepository_exports_on_summary_page'; | |
| my $phrase_id = 'TweetStream/export_menu'; | |
| if ($self->value('tweet_count') > $threshold) | |
| { | |
| $plugin_list = 'tweepository_exports_on_summary_page_too_many'; | |
| $phrase_id = 'TweetStream/export_too_many'; | |
| } | |
| if ($self->value('status') eq 'archived') | |
| { | |
| $plugin_list = 'tweepository_exports_on_summary_page_arhived'; | |
| $phrase_id = 'TweetStream/export_archived'; | |
| return $repository->html_phrase('TweetStream/export_archived'); | |
| } | |
| my $pluginids = $repository->config($plugin_list); | |
| my $export_ul = $xml->create_element('ul'); | |
| foreach my $pluginid (@{$pluginids}) | |
| { | |
| my $plugin = $repository->plugin($pluginid); | |
| next unless $plugin; | |
| my $li = $xml->create_element( "li" ); | |
| my $url = $plugin->dataobj_export_url( $self ); | |
| my $a = $repository->render_link( $url ); | |
| $a->appendChild( $plugin->render_name ); | |
| $li->appendChild( $a ); | |
| $export_ul->appendChild( $li ); | |
| } | |
| return ($repository->html_phrase($phrase_id, export_list => $export_ul)); | |
| } | |
| sub has_owner | |
| { | |
| my( $self, $possible_owner ) = @_; | |
| if( $possible_owner->get_value( "userid" ) == $self->get_value( "userid" ) ) | |
| { | |
| return 1; | |
| } | |
| return 0; | |
| } | |
| sub data_for_export | |
| { | |
| my ($self) = @_; | |
| my $data; | |
| foreach my $fieldname (qw/ search_string top_hashtags top_from_users top_tweetees top_urls_from_text /) | |
| { | |
| $data->{$fieldname} = $self->value($fieldname) if $self->is_set($fieldname); | |
| } | |
| return $data; | |
| } | |
| 1; | |
| } | |
| { | |
| package EPrints::DataObj::TweetStreamExport; | |
| our @ISA = ( 'EPrints::DataObj' ); | |
| use strict; | |
| sub get_dataset_id | |
| { | |
| return "tsexport"; | |
| } | |
| sub get_defaults | |
| { | |
| my( $class, $session, $data, $dataset ) = @_; | |
| $class->SUPER::get_defaults( $session, $data, $dataset ); | |
| $data->{datestamp} = EPrints::Time::get_iso_timestamp(); | |
| $data->{status} = 'pending'; | |
| my $user = $session->current_user; | |
| my $userid; | |
| $data->{userid} = $user->id if defined $user; | |
| return $data; | |
| } | |
| 1; | |
| } | |